U o a@sddlmZmZmZddlmZmZddlZddlm Z ddl m Z ddl m Z ddl m Z dd lmZdd l mZdd lmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"d!ddZ#d"ddZ$ddZ%Gddde&Z'ej(ddZ)ddZ*d#ddZ+Gdd d e,Z-dS)$)absolute_importdivisionunicode_literals)with_metaclassviewkeysN) OrderedDict) _inputstream) _tokenizer) treebuilders)Marker)_utils)spaceCharactersasciiUpper2LowerspecialElementsheadingElements cdataElementsrcdataElements tokenTypes tagTokenTypes namespaceshtmlIntegrationPointElements"mathmlTextIntegrationPointElementsadjustForeignAttributesadjustMathMLAttributesadjustSVGAttributesE_ReparseExceptionetreeTcKs$t|}t||d}|j|f|S)aParse an HTML document as a string or file-like object into a tree :arg doc: the document to parse as a string or file-like object :arg treebuilder: the treebuilder to use when parsing :arg namespaceHTMLElements: whether or not to namespace HTML elements :returns: parsed tree Example: >>> from html5lib.html5parser import parse >>> parse('

This is a doc

') namespaceHTMLElements)r getTreeBuilder HTMLParserparse)doc treebuilderr kwargstbpr)aC:\Users\vtejo\AppData\Local\Temp\pip-unpacked-wheel-6mt8ur68\pip\_vendor\html5lib\html5parser.pyr#s  r#divcKs,t|}t||d}|j|fd|i|S)a#Parse an HTML fragment as a string or file-like object into a tree :arg doc: the fragment to parse as a string or file-like object :arg container: the container context to parse the fragment in :arg treebuilder: the treebuilder to use when parsing :arg namespaceHTMLElements: whether or not to namespace HTML elements :returns: parsed tree Example: >>> from html5lib.html5libparser import parseFragment >>> parseFragment('this is a fragment') r container)r r!r" parseFragment)r$r,r%r r&r'r(r)r)r*r-2s  r-csGfdddt}|S)NcseZdZfddZdS)z-method_decorator_metaclass..Decoratedcs>|D]$\}}t|tjr$|}|||<qt||||SN)items isinstancetypes FunctionTypetype__new__)meta classnamebases classDict attributeName attributefunctionr)r*r4Ms   z5method_decorator_metaclass..Decorated.__new__N)__name__ __module__ __qualname__r4r)r;r)r* DecoratedLsr@)r3)r<r@r)r;r*method_decorator_metaclassKsrAc@seZdZdZd+ddZd,dd Zd d Zed d ZddZ ddZ ddZ ddZ ddZ ddZd-ddZddZdd Zd!d"Zd#d$Zd%d&Zd'd(Zd)d*ZdS).r"z]HTML parser Generates a tree structure from a stream of (possibly malformed) HTML. NFTcsL|_|dkrtd}||_g_tfddt|D_dS)a :arg tree: a treebuilder class controlling the type of tree that will be returned. Built in treebuilders can be accessed through html5lib.treebuilders.getTreeBuilder(treeType) :arg strict: raise an exception when a parse error is encountered :arg namespaceHTMLElements: whether or not to namespace HTML elements :arg debug: whether or not to enable debug mode which logs things Example: >>> from html5lib.html5parser import HTMLParser >>> parser = HTMLParser() # generates parser with etree builder >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict Nrcs g|]\}}||jfqSr))tree).0nameclsselfr)r* zsz'HTMLParser.__init__..) strictr r!rBerrorsdict getPhasesr/phases)rGrBrIr debugr)rFr*__init__^s    zHTMLParser.__init__r+cKsh||_||_||_tj|fd|i||_|z |Wn$tk rb||YnXdS)Nparser) innerHTMLModer, scriptingr HTMLTokenizer tokenizerresetmainLoopr)rGstream innerHTMLr,rRr&r)r)r*_parse}s zHTMLParser._parsecCs|jd|_g|_g|_d|_|jr|j|_ |j t krL|j j |j _ n0|j tkrd|j j|j _ n|j dkr||j j|j _ n|jd|_|j|nd|_ |jd|_d|_d|_d|_dS)NFz no quirks plaintext beforeHtmlinitialT)rBrU firstStartTagrJlog compatModerQr,lowerrXrrT rcdataStatestater rawtextStateplaintextStaterMphaseinsertHtmlElementresetInsertionMode lastPhasebeforeRCDataPhase framesetOKrFr)r)r*rUs*         zHTMLParser.resetcCst|dsdS|jjjdjS)zName of the character encoding that was used to decode the input stream, or :obj:`None` if that is not determined yet rTNr)hasattrrTrW charEncodingrDrFr)r)r*documentEncodings zHTMLParser.documentEncodingcCsJ|jdkr6|jtdkr6d|jko4|jdtdkS|j|jftkSdS)Nannotation-xmlmathmlencoding)z text/htmlzapplication/xhtml+xml)rD namespacer attributes translaterrrGelementr)r)r*isHTMLIntegrationPoints    z!HTMLParser.isHTMLIntegrationPointcCs|j|jftkSr.)rqrDrrtr)r)r*isMathMLTextIntegrationPointsz'HTMLParser.isMathMLTextIntegrationPointcCshtd}td}td}td}td}td}td}|D]}d} |} | dk r| } |jjrp|jjdnd} | r~| jnd} | r| jnd} | d }||kr|| d | d id} qNt|jjd ksd| |jj ksd| | r||kr |d t ddgksd|||fksd| t dkrH| dkrH||krH|d dksd| | rl||||fkrl|j}n |jd}||kr|| } qN||kr|| } qN||kr|| } qN||kr|| } qN||kr|| } qN||krN|| } qN||kr@| dr@| ds@|dd | d iq@d}g}|rd||j|j}|r.|j|ks.tq.dS)N CharactersSpaceCharactersStartTagEndTagCommentDoctype ParseErrorr3datadatavarsrrDmglyph malignmarkrornsvginForeignContent selfClosingselfClosingAcknowledgedz&non-void-element-with-trailing-solidusT)rnormalizedTokensrB openElementsrqrD parseErrorgetlendefaultNamespacerw frozensetrrvrerMprocessCharactersprocessSpaceCharactersprocessStartTag processEndTagprocessCommentprocessDoctypeappend processEOFAssertionError)rGCharactersTokenSpaceCharactersToken StartTagToken EndTagToken CommentToken DoctypeTokenParseErrorTokentoken prev_token new_token currentNodecurrentNodeNamespacecurrentNodeNamer3re reprocessrMr)r)r*rVs                       zHTMLParser.mainLoopccs|jD]}||VqdSr.)rTnormalizeTokenrGrr)r)r*rs zHTMLParser.normalizedTokenscOs |j|ddf|||jS)aParse a HTML document into a well-formed tree :arg stream: a file-like object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element). :arg scripting: treat noscript elements as if JavaScript was turned on :returns: parsed tree Example: >>> from html5lib.html5parser import HTMLParser >>> parser = HTMLParser() >>> parser.parse('

This is a doc

') FN)rYrB getDocumentrGrWargsr&r)r)r*r# szHTMLParser.parsecOs|j|df|||jS)aZParse a HTML fragment into a well-formed tree fragment :arg container: name of the element we're setting the innerHTML property if set to None, default to 'div' :arg stream: a file-like object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) :arg scripting: treat noscript elements as if JavaScript was turned on :returns: parsed tree Example: >>> from html5lib.html5libparser import HTMLParser >>> parser = HTMLParser() >>> parser.parseFragment('this is a fragment') T)rYrB getFragmentrr)r)r*r-$szHTMLParser.parseFragmentXXX-undefined-errorcCs@|dkr i}|j|jj||f|jrr?__doc__rOrYrUpropertyrmrvrwrVrr#r-rrrrrrrgrr)r)r)r*r"Ws(  "  C  ,r"cs"dd}dd}Gdddt|||Gddd}Gd d d }Gfd d d }Gfd dd}Gfddd}Gfddd}Gfddd} Gfddd} Gfddd} Gfddd} Gfddd} Gfddd}Gfdd d }Gfd!d"d"}Gfd#d$d$}Gfd%d&d&}Gfd'd(d(}Gfd)d*d*}Gfd+d,d,}Gfd-d.d.}Gfd/d0d0}Gfd1d2d2}Gfd3d4d4}||||||| | | | | ||||||||||||d5S)6Ncs(tddtDfdd}|S)z4Logger that records which phase processes each tokencss|]\}}||fVqdSr.r))rCkeyvaluer)r)r* sz)getPhases..log..csjdrt|dkr|d}zd|di}WnYnX|dtkr\|d|d<|jj|jjjj|jj j j|j jj|f|f||S|f||SdS)Nprocessrr3rD) r= startswithrrrPr^rrTrbre __class__)rGrr&rinfor< type_namesr)r*wrappeds    z'getPhases..log..wrapped)rKrr/)r<rr)rr*r^s zgetPhases..logcSs|r t|StSdSr.)rAr3) use_metaclassmetaclass_funcr)r)r* getMetaclassszgetPhases..getMetaclassc@sXeZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ ddZ dS)zgetPhases..PhasezNBase class for helper object that implements each phase of processing cSs||_||_dSr.)rPrBrGrPrBr)r)r*rOsz!getPhases..Phase.__init__cSstdSr.)NotImplementedErrorrFr)r)r*rsz#getPhases..Phase.processEOFcSs|j||jjddS)NrrB insertCommentrrr)r)r*rsz'getPhases..Phase.processCommentcSs|jddS)Nzunexpected-doctyperPrrr)r)r*rsz'getPhases..Phase.processDoctypecSs|j|ddSNrrB insertTextrr)r)r*rsz*getPhases..Phase.processCharacterscSs|j|ddSrrrr)r)r*rsz/getPhases..Phase.processSpaceCharacterscSs|j|d|SNrD)startTagHandlerrr)r)r*rsz(getPhases..Phase.processStartTagcSsf|jjs |ddkr |jd|dD],\}}||jjdjkr,||jjdj|<q,d|j_dS)NrDrz non-html-rootrrF)rPr]rr/rBrrrrGrattrrr)r)r* startTagHtmls  z%getPhases..Phase.startTagHtmlcSs|j|d|Sr) endTagHandlerrr)r)r*rsz&getPhases..Phase.processEndTagN) r=r>r?rrOrrrrrrrrr)r)r)r*Phases rc@sLeZdZddZddZddZddZd d Zd d Zd dZ ddZ dS)zgetPhases..InitialPhasecSsdSr.r)rr)r)r*rsz6getPhases..InitialPhase.processSpaceCharacterscSs|j||jjdSr.rBrdocumentrr)r)r*rsz.getPhases..InitialPhase.processCommentcSs|d}|d}|d}|d}|dks@|dk s@|dk rL|dkrL|jd|dkrXd}|j||dkrv|t}|r|ddks|d s|d ks|d r|dks|r|d krd |j_n$|ds|d r|dk rd|j_|jj d|j_ dS)NrDpublicIdsystemIdcorrectrzabout:legacy-compatzunknown-doctype)7z*+//silmaril//dtd html pro v0r11 19970101//z4-//advasoft ltd//dtd html 3.0 aswedit + extensions//z*-//as//dtd html 3.0 aswedit + extensions//z-//ietf//dtd html 2.0 level 1//z-//ietf//dtd html 2.0 level 2//z&-//ietf//dtd html 2.0 strict level 1//z&-//ietf//dtd html 2.0 strict level 2//z-//ietf//dtd html 2.0 strict//z-//ietf//dtd html 2.0//z-//ietf//dtd html 2.1e//z-//ietf//dtd html 3.0//z-//ietf//dtd html 3.2 final//z-//ietf//dtd html 3.2//z-//ietf//dtd html 3//z-//ietf//dtd html level 0//z-//ietf//dtd html level 1//z-//ietf//dtd html level 2//z-//ietf//dtd html level 3//z"-//ietf//dtd html strict level 0//z"-//ietf//dtd html strict level 1//z"-//ietf//dtd html strict level 2//z"-//ietf//dtd html strict level 3//z-//ietf//dtd html strict//z-//ietf//dtd html//z(-//metrius//dtd metrius presentational//z5-//microsoft//dtd internet explorer 2.0 html strict//z.-//microsoft//dtd internet explorer 2.0 html//z0-//microsoft//dtd internet explorer 2.0 tables//z5-//microsoft//dtd internet explorer 3.0 html strict//z.-//microsoft//dtd internet explorer 3.0 html//z0-//microsoft//dtd internet explorer 3.0 tables//z#-//netscape comm. corp.//dtd html//z*-//netscape comm. corp.//dtd strict html//z*-//o'reilly and associates//dtd html 2.0//z3-//o'reilly and associates//dtd html extended 1.0//z;-//o'reilly and associates//dtd html extended relaxed 1.0//zN-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//zE-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//z$-//spyglass//dtd html 2.0 extended//z+-//sq//dtd html 2.0 hotmetal + extensions//z--//sun microsystems corp.//dtd hotjava html//z4-//sun microsystems corp.//dtd hotjava strict html//z-//w3c//dtd html 3 1995-03-24//z-//w3c//dtd html 3.2 draft//z-//w3c//dtd html 3.2 final//z-//w3c//dtd html 3.2//z-//w3c//dtd html 3.2s draft//z-//w3c//dtd html 4.0 frameset//z#-//w3c//dtd html 4.0 transitional//z(-//w3c//dtd html experimental 19960712//z&-//w3c//dtd html experimental 970421//z-//w3c//dtd w3 html//z-//w3o//dtd w3 html 3.0//z#-//webtechs//dtd mozilla html 2.0//z-//webtechs//dtd mozilla html//)z$-//w3o//dtd w3 html strict 3.0//en//z"-/w3c/dtd html 4.0 transitional/enr)z -//w3c//dtd html 4.01 frameset//z$-//w3c//dtd html 4.01 transitional//z:http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtdquirks)z -//w3c//dtd xhtml 1.0 frameset//z$-//w3c//dtd xhtml 1.0 transitional//zlimited quirksr[) rPrrB insertDoctypersrrr`r_rMre)rGrrDrrrr)r)r*rsX   9<?@@ A z.getPhases..InitialPhase.processDoctypecSsd|j_|jjd|j_dS)Nrr[)rPr_rMrerFr)r)r* anythingElseMsz,getPhases..InitialPhase.anythingElsecSs|jd||S)Nzexpected-doctype-but-got-charsrPrrrr)r)r*rQs z1getPhases..InitialPhase.processCharacterscSs"|jdd|di||S)Nz"expected-doctype-but-got-start-tagrDrrr)r)r*rVs  z/getPhases..InitialPhase.processStartTagcSs"|jdd|di||S)Nz expected-doctype-but-got-end-tagrDrrr)r)r*r\s  z-getPhases..InitialPhase.processEndTagcSs|jd|dS)Nzexpected-doctype-but-got-eofTrrFr)r)r*rbs z*getPhases..InitialPhase.processEOFN) r=r>r?rrrrrrrrr)r)r)r* InitialPhases_rc@sDeZdZddZddZddZddZd d Zd d Zd dZ dS)z"getPhases..BeforeHtmlPhasecSs&|jtdd|jjd|j_dS)Nrrzr)rB insertRootimpliedTagTokenrPrMrerFr)r)r*rfisz4getPhases..BeforeHtmlPhase.insertHtmlElementcSs |dSNTrfrFr)r)r*rnsz-getPhases..BeforeHtmlPhase.processEOFcSs|j||jjdSr.rrr)r)r*rrsz1getPhases..BeforeHtmlPhase.processCommentcSsdSr.r)rr)r)r*rusz9getPhases..BeforeHtmlPhase.processSpaceCharacterscSs ||Sr.rrr)r)r*rxsz4getPhases..BeforeHtmlPhase.processCharacterscSs |ddkrd|j_||S)NrDrT)rPr]rfrr)r)r*r|s z2getPhases..BeforeHtmlPhase.processStartTagcSs4|ddkr$|jdd|din ||SdS)NrDrrrbrzunexpected-end-tag-before-html)rPrrfrr)r)r*rs   z0getPhases..BeforeHtmlPhase.processEndTagN) r=r>r?rfrrrrrrr)r)r)r*BeforeHtmlPhasegsrcsXeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)z"getPhases..BeforeHeadPhasecsV|||td|jfd|jfg|_|j|j_td|jfg|_ |j |j _dS)Nrrr) rOr MethodDispatcherr startTagHeadr startTagOtherdefaultendTagImplyHeadr endTagOtherrrr)r*rOs z+getPhases..BeforeHeadPhase.__init__cSs|tdddS)NrrzTr rrFr)r)r*rsz-getPhases..BeforeHeadPhase.processEOFcSsdSr.r)rr)r)r*rsz9getPhases..BeforeHeadPhase.processSpaceCharacterscSs|tdd|SNrrzrrr)r)r*rsz4getPhases..BeforeHeadPhase.processCharacterscSs|jjd|SNrrPrMrrr)r)r*rsz/getPhases..BeforeHeadPhase.startTagHtmlcSs0|j||jjd|j_|jjd|j_dS)NrinHead)rBrr headPointerrPrMrerr)r)r*r s z/getPhases..BeforeHeadPhase.startTagHeadcSs|tdd|Srrrr)r)r*r sz0getPhases..BeforeHeadPhase.startTagOthercSs|tdd|Srrrr)r)r*r sz2getPhases..BeforeHeadPhase.endTagImplyHeadcSs|jdd|didS)Nzend-tag-after-implied-rootrDrrr)r)r*r s z.getPhases..BeforeHeadPhase.endTagOtherN) r=r>r?rOrrrrr r r r r)rr)r*BeforeHeadPhases rcseZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZddZddZdd Zd!S)"zgetPhases..InHeadPhasec s|||td|jfd|jfd|jfd|jfd|jfd|jfd|j fd|j fg|_ |j |j _ td|jfd |jfg|_|j|j_ dS) Nrtitle)noframesstylenoscriptscript)basebasefontbgsoundcommandlinkr5r)rrr)rOr rr startTagTitlestartTagNoFramesStylestartTagNoscriptstartTagScriptstartTagBaseLinkCommand startTagMetar rr r  endTagHeadendTagHtmlBodyBrrr rrr)r*rOs& z'getPhases..InHeadPhase.__init__cSs |dSrrrFr)r)r*rsz)getPhases..InHeadPhase.processEOFcSs ||Sr.r(rr)r)r*rsz0getPhases..InHeadPhase.processCharacterscSs|jjd|Srrrr)r)r*rsz+getPhases..InHeadPhase.startTagHtmlcSs|jddS)Nz!two-heads-are-not-better-than-onerrr)r)r*r sz+getPhases..InHeadPhase.startTagHeadcSs$|j||jjd|d<dSNTrrBrrpoprr)r)r*r$s  z6getPhases..InHeadPhase.startTagBaseLinkCommandcSs|j||jjd|d<|d}|jjjjddkrd|krZ|jjj|dnVd|krd|kr|d d krt |d d }t |}|}|jjj|dS) NTrrr tentativecharsetcontentz http-equivz content-typezutf-8)rBrrr+rPrTrWrlchangeEncodingr`r EncodingBytesencodeContentAttrParserr#)rGrrrrrPcodecr)r)r*r%s    z+getPhases..InHeadPhase.startTagMetacSs|j|ddS)NrrPrrr)r)r*r sz,getPhases..InHeadPhase.startTagTitlecSs|j|ddS)Nrr4rr)r)r*r!sz4getPhases..InHeadPhase.startTagNoFramesStylecSs8|jjr|j|dn|j||jjd|j_dS)NrinHeadNoscript)rPrRrrBrrMrerr)r)r*r"s z/getPhases..InHeadPhase.startTagNoscriptcSs<|j||jjj|jj_|jj|j_|jjd|j_dS)Nr) rBrrPrTscriptDataStaterbrerrMrr)r)r*r#s  z-getPhases..InHeadPhase.startTagScriptcSs ||Sr.r(rr)r)r*r  sz,getPhases..InHeadPhase.startTagOthercSs:|jjj}|jdks&td|j|jjd|j_dS)NrzExpected head got %s afterHeadrPrBrr+rDrrMrerGrrr)r)r*r&sz)getPhases..InHeadPhase.endTagHeadcSs ||Sr.r(rr)r)r*r'sz/getPhases..InHeadPhase.endTagHtmlBodyBrcSs|jdd|didSNunexpected-end-tagrDrrr)r)r*r sz*getPhases..InHeadPhase.endTagOthercSs|tddS)Nr)r&rrFr)r)r*rsz+getPhases..InHeadPhase.anythingElseN)r=r>r?rOrrrr r$r%r r!r"r#r r&r'r rr)rr)r* InHeadPhases  r<csxeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZdS)z&getPhases..InHeadNoscriptPhasecsf|||td|jfd|jfd|jfg|_|j|j_td|j fd|j fg|_ |j |j _dS)Nr)rrrr5rr)rrrr) rOr rrr$startTagHeadNoscriptrr r endTagNoscriptendTagBrrr rrr)r*rO s z/getPhases..InHeadNoscriptPhase.__init__cSs|jd|dS)Nzeof-in-head-noscriptTrrFr)r)r*r0s z1getPhases..InHeadNoscriptPhase.processEOFcSs|jjd|SNr)rPrMrrr)r)r*r5sz5getPhases..InHeadNoscriptPhase.processCommentcSs|jd||S)Nzchar-in-head-noscriptrrr)r)r*r8s z8getPhases..InHeadNoscriptPhase.processCharacterscSs|jjd|Sr@rPrMrrr)r)r*r=sz=getPhases..InHeadNoscriptPhase.processSpaceCharacterscSs|jjd|Srrrr)r)r*r@sz3getPhases..InHeadNoscriptPhase.startTagHtmlcSs|jjd|Sr@rrr)r)r*r$Csz>getPhases..InHeadNoscriptPhase.startTagBaseLinkCommandcSs|jdd|didSNunexpected-start-tagrDrrr)r)r*r=Fsz;getPhases..InHeadNoscriptPhase.startTagHeadNoscriptcSs"|jdd|di||SNzunexpected-inhead-noscript-tagrDrrr)r)r*r Isz4getPhases..InHeadNoscriptPhase.startTagOthercSs:|jjj}|jdks&td|j|jjd|j_dS)NrzExpected noscript got %srr8r9r)r)r*r>Nsz5getPhases..InHeadNoscriptPhase.endTagNoscriptcSs"|jdd|di||SrDrrr)r)r*r?Ssz/getPhases..InHeadNoscriptPhase.endTagBrcSs|jdd|didSr:rrr)r)r*r Xsz2getPhases..InHeadNoscriptPhase.endTagOthercSs|tddS)Nr)r>rrFr)r)r*r[sz3getPhases..InHeadNoscriptPhase.anythingElseN)r=r>r?rOrrrrrr$r=r r>r?r rr)rr)r*InHeadNoscriptPhases rEcspeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZdS)z!getPhases..AfterHeadPhasecsn|||td|jfd|jfd|jfd|jfd|jfg|_|j |j_ td|j fg|_ |j |j _ dS)Nrrr) rrrrr5rrrrr)rrr)rOr rr startTagBodystartTagFramesetstartTagFromHeadr rr r r'rr rrr)r*rO`s  z*getPhases..AfterHeadPhase.__init__cSs |dSrr(rFr)r)r*rqsz,getPhases..AfterHeadPhase.processEOFcSs ||Sr.r(rr)r)r*rusz3getPhases..AfterHeadPhase.processCharacterscSs|jjd|Srrrr)r)r*rysz.getPhases..AfterHeadPhase.startTagHtmlcSs(d|j_|j||jjd|j_dS)NFr)rPrjrBrrMrerr)r)r*rF|s z.getPhases..AfterHeadPhase.startTagBodycSs |j||jjd|j_dS)Nr)rBrrPrMrerr)r)r*rGs z2getPhases..AfterHeadPhase.startTagFramesetcSsr|jdd|di|jj|jj|jjd||jjdddD] }|jdkrL|jj |qnqLdS)Nz#unexpected-start-tag-out-of-my-headrDrrr) rPrrBrrrrMrrDremover9r)r)r*rHs  z2getPhases..AfterHeadPhase.startTagFromHeadcSs|jdd|didSrBrrr)r)r*r sz.getPhases..AfterHeadPhase.startTagHeadcSs ||Sr.r(rr)r)r*r sz/getPhases..AfterHeadPhase.startTagOthercSs ||Sr.r(rr)r)r*r'sz2getPhases..AfterHeadPhase.endTagHtmlBodyBrcSs|jdd|didSr:rrr)r)r*r sz-getPhases..AfterHeadPhase.endTagOthercSs.|jtdd|jjd|j_d|j_dS)NrrzrT)rBrrrPrMrerjrFr)r)r*rsz.getPhases..AfterHeadPhase.anythingElseN)r=r>r?rOrrrrFrGrHr r r'r rr)rr)r*AfterHeadPhase_s  rJcseZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZddZddZdd Zd!d"Zd#d$Zd%d&Zd'd(Zd)d*Zd+d,Zd-d.Zd/d0Zd1d2Zd3d4Zd5d6Zd7d8Zd9d:Zd;d<Z d=d>Z!d?d@Z"dAdBZ#dCdDZ$dEdFZ%dGdHZ&dIdJZ'dKdLZ(dMdNZ)dOdPZ*dQdRZ+dSdTZ,dUdVZ-dWdXZ.dYdZZ/d[d\Z0d]d^Z1d_d`Z2dadbZ3dcddZ4dedfZ5dgS)hzgetPhases..InBodyPhasec$s||||j|_td|jfd|jfd|jfd|jfd|j ft |j fd|j fd|j fd|jfd |jfd |jfd |jfd |jfd |jfd|jfd|jfd|jfd|jfd|jfd|jfd|jfd|jfd|jfd|jfd|jfd|jfd|j fd|j!fd|j"fd|j#fd|j$fd|j%fd |j&fg!|_'|j(|j'_)td|j*fd|j+fd!|j,fd|j-fd"|j.fd#|j/ft |j0fd$|j1fd|j2fd%|j3fg |_4|j5|j4_)dS)&Nr) rrrrrr5rrrrr)addressarticleaside blockquotecenterdetailsdirr+dlfieldset figcaptionfigurefooterheaderhgroupmainmenunavolr(sectionsummaryul)prelistingform)lidddtrZa) bbigcodeemfontissmallstrikestrongttunobrbutton)appletmarqueeobjectxmpr)arearembedimgkeygenwbr)paramsourcetrackinputhrimageisindextextareaiframer)noembedrr)rprt)optionoptgroupmathr) rcolrframerrrrrrr)rKrLrMrNrtrOrPdialogrQr+rRrSrTrUrVrWrXrarYrZr[r\r`r]r^r_r()rdrerc)rfrgrhrirjrkrlrsrmrnrorprqrrr)6rOprocessSpaceCharactersNonPrerr rrstartTagProcessInHeadrFrGstartTagClosePrstartTagHeadingstartTagPreListing startTagFormstartTagListItemstartTagPlaintext startTagAstartTagFormatting startTagNobrstartTagButtonstartTagAppletMarqueeObject startTagXmp startTagTablestartTagVoidFormattingstartTagParamSource startTagInput startTagHr startTagImagestartTagIsIndexstartTagTextareastartTagIFramer"startTagRawtextstartTagSelect startTagRpRt startTagOpt startTagMath startTagSvgstartTagMisplacedrr r  endTagBody endTagHtml endTagBlock endTagFormendTagPendTagListItem endTagHeadingendTagFormattingendTagAppletMarqueeObjectr?rr rrr)r*rOs- z'getPhases..InBodyPhase.__init__cSs$|j|jko"|j|jko"|j|jkSr.)rDrqrr)rGnode1node2r)r)r*isMatchingFormattingElements    z:getPhases..InBodyPhase.isMatchingFormattingElementcSs|j||jjd}g}|jjdddD](}|tkr@qXq.|||r.||q.t|dkshtt|dkr|jj |d|jj|dS)Nr) rBrractiveFormattingElementsr rrrrrI)rGrrumatchingElementsrr)r)r*addFormattingElements     z3getPhases..InBodyPhase.addFormattingElementcSs>td}|jjdddD]}|j|kr|jdq:qdS)N) rdrercr(rrrrrrrrrz expected-closing-tag-but-got-eof)rrBrrDrPr)rGallowed_elementsrr)r)r*rs   z)getPhases..InBodyPhase.processEOFcSsf|d}|j|_|drH|jjdjdkrH|jjdsH|dd}|rb|j|j|dS)Nr r)r`rarr) rrrrBrrD hasContent#reconstructActiveFormattingElementsr)rGrrr)r)r*!processSpaceCharactersDropNewline s   z@getPhases..InBodyPhase.processSpaceCharactersDropNewlinecSsT|ddkrdS|j|j|d|jjrPtdd|dDrPd|j_dS)NrcSsg|] }|tkqSr)rrCcharr)r)r*rH"szDgetPhases..InBodyPhase.processCharacters..F)rBrrrPrjanyrr)r)r*rs  z0getPhases..InBodyPhase.processCharacterscSs|j|j|ddSr)rBrrrr)r)r*r&s z;getPhases..InBodyPhase.processSpaceCharactersNonPrecSs|jjd|Sr@rrr)r)r*r*sz4getPhases..InBodyPhase.startTagProcessInHeadcSs|jdddit|jjdks4|jjdjdkrB|jjstnBd|j_|d D],\}}||jjdj krV||jjdj |<qVdS)NrCrDrrFr) rPrrrBrrDrXrrjr/rrrr)r)r*rF-sz+getPhases..InBodyPhase.startTagBodycSs|jdddit|jjdks4|jjdjdkrB|jjstnp|jjsLnf|jjdj rv|jjdj |jjd|jjdjdkr|jj qv|j ||jj d|j_dS) NrCrDrrrrrr)rPrrrBrrDrXrrjparent removeChildr+rrMrerr)r)r*rG8s" z/getPhases..InBodyPhase.startTagFramesetcSs.|jjdddr|td|j|dSNr(rtvariant)rBelementInScoperrrrr)r)r*rFsz-getPhases..InBodyPhase.startTagClosePcSs>|jjdddr|td|j|d|j_|j|_dS)Nr(rtrF) rBrrrrrPrjrrrr)r)r*rKs  z1getPhases..InBodyPhase.startTagPreListingcSsZ|jjr|jdddin:|jjdddr:|td|j||jjd|j_dS)NrCrDrbr(rtrr) rB formPointerrPrrrrrrrr)r)r*rRs  z+getPhases..InBodyPhase.startTagFormcSsd|j_dgddgddgd}||d}t|jjD]@}|j|kr^|jjt|jdqx|j t kr6|jdkr6qxq6|jj d d d r|jjtd d|j |dS) NFrcrerd)rcrerdrDr{)rKr+r(r(rtr) rPrjreversedrBrrDrerr nameTuplerrr)rGr stopNamesMap stopNamesrr)r)r*r[s*    z/getPhases..InBodyPhase.startTagListItemcSs>|jjdddr|td|j||jjj|jj_dSr) rBrrrrrPrTrdrbrr)r)r*rqs z0getPhases..InBodyPhase.startTagPlaintextcSsb|jjdddr|td|jjdjtkrR|jdd|di|jj |j |dS)Nr(rtrrrCrD) rBrrrrrDrrPrr+rrr)r)r*rws  z.getPhases..InBodyPhase.startTagHeadingcSs~|jd}|rf|jdddd|td||jjkrL|jj|||jjkrf|jj||j | |dS)Nrf$unexpected-start-tag-implies-end-tag startNameendName) rB!elementInActiveFormattingElementsrPrrrrrIrrr)rGr afeAElementr)r)r*rs    z(getPhases..InBodyPhase.startTagAcSs|j||dSr.)rBrrrr)r)r*rs z1getPhases..InBodyPhase.startTagFormattingcSsP|j|jdrB|jdddd|td|j||dS)Nrsrr)rBrrrPrrrrrr)r)r*rs   z+getPhases..InBodyPhase.startTagNobrcSsT|jdr2|jdddd|td|S|j|j|d|j_dS)NrtrrF) rBrrPrrrrrrjrr)r)r*rs   z-getPhases..InBodyPhase.startTagButtoncSs0|j|j||jjtd|j_dSNF)rBrrrrr rPrjrr)r)r*rs  z:getPhases..InBodyPhase.startTagAppletMarqueeObjectcSsB|jjdddr|td|jd|j_|j|ddS)Nr(rtrFr)rBrrrrrPrjrrr)r)r*rs  z*getPhases..InBodyPhase.startTagXmpcSsR|jjdkr*|jjdddr*|td|j|d|j_|jjd|j_ dS)Nrr(rtrFr) rPr_rBrrrrrjrMrerr)r)r*rs   z,getPhases..InBodyPhase.startTagTablecSs6|j|j||jjd|d<d|j_dS)NTrF)rBrrrr+rPrjrr)r)r*rs    z5getPhases..InBodyPhase.startTagVoidFormattingcSs@|jj}||d|dkr<|ddtdkr<||j_dS)Nr3rhidden)rPrjrrsr)rGrrjr)r)r*rs   z,getPhases..InBodyPhase.startTagInputcSs$|j||jjd|d<dSr)r*rr)r)r*rs  z2getPhases..InBodyPhase.startTagParamSourcecSsJ|jjdddr|td|j||jjd|d<d|j_dS)Nr(rtrTrF) rBrrrrrr+rPrjrr)r)r*rs   z)getPhases..InBodyPhase.startTagHrcSs6|jdddd|tdd|d|dddS) Nzunexpected-start-tag-treated-asrr{ originalNamenewNamerzrrrrr)rPrrrrr)r)r*rs z,getPhases..InBodyPhase.startTagImagecSs|jdddi|jjrdSi}d|dkr>|dd|d<|tdd|d|td d|td dd |dkr|dd }nd }|td |d|d}d|kr|d=d |kr|d =d|d<|tdd||dd| td |td d| tddS)Nzdeprecated-tagrDractionrrbrz)rrrlabelpromptz3This is a searchable index. Enter search keywords: rxr3rrrr) rPrrBrrrrrcopyr)rGr form_attrsrrrr)r)r*rs<      z.getPhases..InBodyPhase.startTagIsIndexcSs0|j||jjj|jj_|j|_d|j_dSr) rBrrPrTrarbrrrjrr)r)r*rs z/getPhases..InBodyPhase.startTagTextareacSsd|j_||dSr)rPrjrrr)r)r*rsz-getPhases..InBodyPhase.startTagIFramecSs"|jjr||n ||dSr.)rPrRrr rr)r)r*r"s z/getPhases..InBodyPhase.startTagNoscriptcSs|j|ddS)z8iframe, noembed noframes, noscript(if scripting enabled)rNr4rr)r)r*r sz.getPhases..InBodyPhase.startTagRawtextcSs@|jjdjdkr$|jjtd|j|jj|dSNrr) rBrrDrPrerrrrrr)r)r*rs z*getPhases..InBodyPhase.startTagOptcSs|j|j|d|j_|jj|jjd|jjd|jjd|jjd|jjd|jjdfkrx|jjd|j_n|jjd |j_dS) NFrrrrrrinSelectInTabler)rBrrrPrjrerMrr)r)r*rs       z-getPhases..InBodyPhase.startTagSelectcSsB|jdr2|j|jjdjdkr2|j|j|dS)Nrubyr)rBrgenerateImpliedEndTagsrrDrPrrrr)r)r*r%s    z+getPhases..InBodyPhase.startTagRpRtcSsZ|j|j||j|td|d<|j||drV|jjd|d<dS)NrorqrTr) rBrrPrrrrrr+rr)r)r*r,s      z+getPhases..InBodyPhase.startTagMathcSsZ|j|j||j|td|d<|j||drV|jjd|d<dS)NrrqrTr) rBrrPrrrrrr+rr)r)r*r8s      z*getPhases..InBodyPhase.startTagSvgcSs|jdd|didS)a5 Elements that should be children of other elements that have a different insertion mode; here they are ignored "caption", "col", "colgroup", "frame", "frameset", "head", "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", "tr", "noscript" zunexpected-start-tag-ignoredrDNrrr)r)r*rDsz0getPhases..InBodyPhase.startTagMisplacedcSs|j|j|dSr.)rBrrrr)r)r*r Ms z,getPhases..InBodyPhase.startTagOthercSs|jjdddsD|tdd|jdddi|tddnT|jd|jjdj dkrt|jdddi|jj }|j dkr|jj }qdS) Nr(rtrrzr;rDr{r) rBrrrrPrrrrrDr+r9r)r)r*rQs   z&getPhases..InBodyPhase.endTagPcSs~|jds|jdS|jjdjdkrj|jjddD],}|jtdkr<|jdd|jdqjq<|jjd|j_dS)Nrr)rdrercrrr(rrrrrrrrrr$expected-one-end-tag-but-got-anothergotName expectedName afterBody) rBrrPrrrDrrMrer9r)r)r*r^s   z)getPhases..InBodyPhase.endTagBodycSs"|jdr|td|SdS)Nr)rBrrrrr)r)r*rps z)getPhases..InBodyPhase.endTagHtmlcSs|ddkr|j|_|j|d}|r2|j|jjdj|dkr^|jdd|di|r|jj }|j|dkr|jj }qndS)NrDr`rend-tag-too-early) rrrBrrrrDrPrr+)rGrinScoperr)r)r*rvs   z*getPhases..InBodyPhase.endTagBlockcSsv|jj}d|j_|dks$|j|s8|jdddin:|j|jjd|krd|jdddi|jj|dS)Nr;rDrbrzend-tag-too-early-ignored)rBrrrPrrrrIr9r)r)r*rs z)getPhases..InBodyPhase.endTagFormcSs|ddkrd}nd}|jj|d|dsB|jdd|dinf|jj|dd|jjdj|dkr|jdd|di|jj}|j|dkr|jj}qdS) NrDrclistrr;excluderr)rBrrPrrrrDr+)rGrrrr)r)r*rs   z-getPhases..InBodyPhase.endTagListItemcSstD]}|j|r|jq$q|jjdj|dkrP|jdd|ditD]8}|j|rT|jj}|jtkr|jj}qpqqTdS)NrrDr) rrBrrrrDrPrr+)rGritemr)r)r*rs     z,getPhases..InBodyPhase.endTagHeadingcSsd}|dkr |d7}|j|d}|rD||jjkrR|j|jsR||dS||jjkr|jdd|di|jj |dS|j|js|jdd|didS||jjdkr|jd d|di|jj |}d}|jj|dD]}|j t kr|}qq|dkrR|jj }||kr@|jj }q&|jj |dS|jj|d}|jj |}|} } d} |jj | } | d krX| d7} | d8} |jj| } | |jjkr|jj | q| |krސqX| |kr|jj | d}| } | |jj|jj | <| |jj|jj | <| } | jrF| j| | | | } q| jrl| j| |jtd kr|j\}}|| |n || |} || || |jj ||jj|| |jj ||jj|jj |d| qdS) z)The much-feared adoption agency algorithmrrrDNzadoption-agency-1.2zadoption-agency-4.4rzadoption-agency-1.3rrrrrr)rBrrrrDr rPrrrIindexrrr+ cloneNoderr appendChildrgetTableMisnestedNodePosition insertBeforereparentChildreninsert)rGrouterLoopCounterformattingElementafeIndex furthestBlockrucommonAncestorbookmarklastNoderinnerLoopCounterrclonerrr)r)r*rs                      z/getPhases..InBodyPhase.endTagFormattingcSs|j|dr|j|jjdj|dkrF|jdd|di|j|dr|jj}|j|dkr~|jj}qb|jdS)NrDrr) rBrrrrDrPrr+clearActiveFormattingElements)rGrrur)r)r*rTs  z8getPhases..InBodyPhase.endTagAppletMarqueeObjectcSs@|jdddd|j|jtdd|jjdS)Nzunexpected-end-tag-treated-asrz br elementrrz)rPrrBrrrrr+rr)r)r*r?`s  z'getPhases..InBodyPhase.endTagBrcSs|jjdddD]}|j|dkrz|jj|dd|jjdj|dkrb|jdd|di|jj|krtqbqq|jtkr|jdd|diqqdS)NrrDrr;) rBrrDrrPrr+rrr9r)r)r*r gs z*getPhases..InBodyPhase.endTagOtherN)6r=r>r?rOrrrrrrrrFrGrrrrrrrrrrrrrrrrrrrrrr"rrrrrrrr rrrrrrrrrr?r r)rr)r* InBodyPhasesh G             $ rcs@eZdZfddZddZddZddZd d Zd d Zd S)zgetPhases..TextPhasecsF|||tg|_|j|j_td|jfg|_|j|j_dSNr) rOr rrr r  endTagScriptrr rrr)r*rOvs  z%getPhases..TextPhase.__init__cSs|j|ddSrrrr)r)r*r~sz.getPhases..TextPhase.processCharacterscSs8|jdd|jjdji|jj|jj|j_dS)Nz&expected-named-closing-tag-but-got-eofrDrT)rPrrBrrDr+rrerFr)r)r*rs   z'getPhases..TextPhase.processEOFcSsdstd|ddS)NFz4Tried to process start tag %s in RCDATA/RAWTEXT moderD)rrr)r)r*r sz*getPhases..TextPhase.startTagOthercSs*|jj}|jdkst|jj|j_dSr )rBrr+rDrrPrrer9r)r)r*r s z)getPhases..TextPhase.endTagScriptcSs|jj|jj|j_dSr.)rBrr+rPrrerr)r)r*r s z(getPhases..TextPhase.endTagOtherN) r=r>r?rOrrr r r r)rr)r* TextPhaseus  r cseZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZddZddZdd Zd!d"Zd#d$Zd%d&Zd'S)(zgetPhases..InTablePhasec s|||td|jfd|jfd|jfd|jfd|jfd|jfd|j fd|j fd |j fd |j fg |_ |j|j _td|jfd |jfg|_|j|j_dS) Nrrrrrrr)rrrr)rrrrb) rrrrrrrrrrr)rOr rrstartTagCaptionstartTagColgroup startTagColstartTagRowGroupstartTagImplyTbodyrstartTagStyleScriptrrrr r  endTagTable endTagIgnorerr rrr)r*rOs* z(getPhases..InTablePhase.__init__cSs$|jjdjdkr |jjqdS)Nr)rr)rBrrDr+rFr)r)r*clearStackToTableContextsz8getPhases..InTablePhase.clearStackToTableContextcSs0|jjdjdkr |jdn |jjs,tdS)Nrrz eof-in-tablerBrrDrPrrXrrFr)r)r*rsz*getPhases..InTablePhase.processEOFcSs4|jj}|jjd|j_||jj_|jj|dSN inTableText)rPrerMrrrGrrr)r)r*rs z6getPhases..InTablePhase.processSpaceCharacterscSs4|jj}|jjd|j_||jj_|jj|dSr)rPrerMrrrr)r)r*rs z1getPhases..InTablePhase.processCharacterscSs&d|j_|jjd|d|j_dS)NTrF)rBinsertFromTablerPrMrrr)r)r*rsz*getPhases..InTablePhase.insertTextcSs6||jjt|j||jjd|j_dS)Nr) rrBrrr rrPrMrerr)r)r*r s z/getPhases..InTablePhase.startTagCaptioncSs(||j||jjd|j_dS)NrrrBrrPrMrerr)r)r*rs z0getPhases..InTablePhase.startTagColgroupcSs|tdd|S)Nrrz)rrrr)r)r*rsz+getPhases..InTablePhase.startTagColcSs(||j||jjd|j_dSNrrrr)r)r*rs z0getPhases..InTablePhase.startTagRowGroupcSs|tdd|S)Nrrz)rrrr)r)r*rsz2getPhases..InTablePhase.startTagImplyTbodycSs6|jdddd|jjtd|jjs2|SdS)Nrrr)rPrrerrrXrr)r)r*rs z-getPhases..InTablePhase.startTagTablecSs|jjd|Sr@rrr)r)r*rsz3getPhases..InTablePhase.startTagStyleScriptcSsVd|dkrH|ddtdkrH|jd|j||jjn ||dS)Nr3rrz unexpected-hidden-input-in-table) rsrrPrrBrrr+r rr)r)r*rs   z-getPhases..InTablePhase.startTagInputcSsD|jd|jjdkr@|j||jjd|j_|jjdS)Nzunexpected-form-in-tabler)rPrrBrrrr+rr)r)r*rs    z,getPhases..InTablePhase.startTagFormcSs<|jdd|did|j_|jjd|d|j_dS)Nz)unexpected-start-tag-implies-table-voodoorDTrF)rPrrBrrMrrr)r)r*r sz-getPhases..InTablePhase.startTagOthercSs|jjdddr|j|jjdjdkrJ|jdd|jjdjd|jjdjdkrj|jjqJ|jj|jn|jj st |jdS)Nrrrzend-tag-too-early-namedr) rBrrrrDrPrr+rgrXrrr)r)r*r s     z+getPhases..InTablePhase.endTagTablecSs|jdd|didSr:rrr)r)r*rsz,getPhases..InTablePhase.endTagIgnorecSs<|jdd|did|j_|jjd|d|j_dS)Nz'unexpected-end-tag-implies-table-voodoorDTrF)rPrrBrrMrrr)r)r*r  sz+getPhases..InTablePhase.endTagOtherN)r=r>r?rOrrrrrr rrrrrrrrr rrr r)rr)r* InTablePhases&   rcsPeZdZfddZddZddZddZd d Zd d Zd dZ ddZ dS)z#getPhases..InTableTextPhasecs|||d|_g|_dSr.)rOrcharacterTokensrrr)r*rO(sz,getPhases..InTableTextPhase.__init__cSsdddd|jD}tdd|DrJtd|d}|jjd|n|rZ|j|g|_dS)NrcSsg|] }|dqS)rr)rCrr)r)r*rH.szGgetPhases..InTableTextPhase.flushCharacters..cSsg|] }|tkqSr)rrr)r)r*rH/srxrr)joinrrrrPrMrrB)rGrrr)r)r*flushCharacters-s z3getPhases..InTableTextPhase.flushCharacterscSs||j|j_|Sr.r!rrPrerr)r)r*r6s z2getPhases..InTableTextPhase.processCommentcSs||j|j_dSrr"rFr)r)r*r;s z.getPhases..InTableTextPhase.processEOFcSs |ddkrdS|j|dSNrrrrrr)r)r*r@s z5getPhases..InTableTextPhase.processCharacterscSs|j|dSr.r$rr)r)r*rEsz:getPhases..InTableTextPhase.processSpaceCharacterscSs||j|j_|Sr.r"rr)r)r*rJs z3getPhases..InTableTextPhase.processStartTagcSs||j|j_|Sr.r"rr)r)r*rOs z1getPhases..InTableTextPhase.processEndTagN) r=r>r?rOr!rrrrrrr)rr)r*InTableTextPhase's  r%cs`eZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ dS)z!getPhases..InCaptionPhasecsf|||td|jfd|jfg|_|j|j_td|jfd|j fd|j fg|_ |j |j _dS)Nr rrrrrrrrrrr) rrrrrrrrrr) rOr rrstartTagTableElementrr r  endTagCaptionrrrr rrr)r*rOVs  z*getPhases..InCaptionPhase.__init__cSs|jjddd S)NrrrrBrrFr)r)r*ignoreEndTagCaptionhsz5getPhases..InCaptionPhase.ignoreEndTagCaptioncSs|jjddSrrPrMrrFr)r)r*rksz,getPhases..InCaptionPhase.processEOFcSs|jjd|SrrPrMrrr)r)r*rnsz3getPhases..InCaptionPhase.processCharacterscSs0|j|}|jjtd|s,|SdSNrrPrr*rerrrGr ignoreEndTagr)r)r*r'qs  z6getPhases..InCaptionPhase.startTagTableElementcSs|jjd|Srrrr)r)r*r ysz/getPhases..InCaptionPhase.startTagOthercSs|s|j|jjdjdkrB|jdd|jjdjd|jjdjdkrb|jjqB|jj|j|jj d|j_ n|jj st |jdS)Nrrrrr) r*rBrrrDrPrr+rrMrerXrrr)r)r*r(|s     z/getPhases..InCaptionPhase.endTagCaptioncSs0|j|}|jjtd|s,|SdSr-r.r/r)r)r*rs  z-getPhases..InCaptionPhase.endTagTablecSs|jdd|didSr:rrr)r)r*rsz.getPhases..InCaptionPhase.endTagIgnorecSs|jjd|SrrPrMrrr)r)r*r sz-getPhases..InCaptionPhase.endTagOtherN) r=r>r?rOr*rrr'r r(rrr r)rr)r*InCaptionPhaseTs r2csXeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)z%getPhases..InColumnGroupPhasecs^|||td|jfd|jfg|_|j|j_td|jfd|j fg|_ |j |j _dS)Nrrr) rOr rrrrr r endTagColgroup endTagColrr rrr)r*rOs z.getPhases..InColumnGroupPhase.__init__cSs|jjdjdkS)Nrr)rBrrDrFr)r)r*ignoreEndTagColgroupsz:getPhases..InColumnGroupPhase.ignoreEndTagColgroupcSsD|jjdjdkr"|jjstdS|}|td|s@dSdS)NrrrT) rBrrDrPrXrr5r3r)rGr0r)r)r*rs z0getPhases..InColumnGroupPhase.processEOFcSs"|}|td|s|SdSNrr5r3rr/r)r)r*rsz7getPhases..InColumnGroupPhase.processCharacterscSs$|j||jjd|d<dSr)r*rr)r)r*rs  z1getPhases..InColumnGroupPhase.startTagColcSs"|}|td|s|SdSr6r7r/r)r)r*r sz3getPhases..InColumnGroupPhase.startTagOthercSs@|r |jjst|jn|jj|jjd|j_ dSNr) r5rPrXrrrBrr+rMrerr)r)r*r3s    z4getPhases..InColumnGroupPhase.endTagColgroupcSs|jdddidS)Nz no-end-tagrDrrrr)r)r*r4sz/getPhases..InColumnGroupPhase.endTagColcSs"|}|td|s|SdSr6r7r/r)r)r*r sz1getPhases..InColumnGroupPhase.endTagOtherN) r=r>r?rOr5rrrr r3r4r r)rr)r*InColumnGroupPhases   r9csxeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZdS)z#getPhases..InTableBodyPhasecsv|||td|jfd|jfd|jfd|jfg|_|j|j_ td|j fd|j fd|j fg|_ |j|j _ dS)Nrrrr)rrrrrrr r)rrrrrrrr)rOr rr startTagTrstartTagTableCellstartTagTableOtherrr r endTagTableRowGrouprrrr rrr)r*rOs$ z,getPhases..InTableBodyPhase.__init__cSsB|jjdjdkr |jjq|jjdjdkr>|jjs>tdS)Nr)rrrrr)rBrrDr+rPrXrrFr)r)r*clearStackToTableBodyContextsz@getPhases..InTableBodyPhase.clearStackToTableBodyContextcSs|jjddSr8r+rFr)r)r*rsz.getPhases..InTableBodyPhase.processEOFcSs|jjd|Sr8rArr)r)r*rsz:getPhases..InTableBodyPhase.processSpaceCharacterscSs|jjd|Sr8r,rr)r)r*rsz5getPhases..InTableBodyPhase.processCharacterscSs(||j||jjd|j_dS)Nr)r?rBrrPrMrerr)r)r*r;s z.getPhases..InTableBodyPhase.startTagTrcSs*|jdd|di|tdd|S)Nzunexpected-cell-in-table-bodyrDrrz)rPrr;rrr)r)r*r< s  z5getPhases..InTableBodyPhase.startTagTableCellcSsn|jjddds0|jjddds0|jjdddrT||t|jjdj|S|jjs`t |j dSNrrrrrr rBrr?r>rrrDrPrXrrrr)r)r*r=s z6getPhases..InTableBodyPhase.startTagTableOthercSs|jjd|Sr8rrr)r)r*r sz1getPhases..InTableBodyPhase.startTagOthercSsT|jj|dddr:||jj|jjd|j_n|jdd|didS)NrDrrr unexpected-end-tag-in-table-body) rBrr?rr+rPrMrerrr)r)r*r>"s  z7getPhases..InTableBodyPhase.endTagTableRowGroupcSsn|jjddds0|jjddds0|jjdddrT||t|jjdj|S|jjs`t |j dSr@rArr)r)r*r+s z/getPhases..InTableBodyPhase.endTagTablecSs|jdd|didS)NrBrDrrr)r)r*r8s z0getPhases..InTableBodyPhase.endTagIgnorecSs|jjd|Sr8r1rr)r)r*r <sz/getPhases..InTableBodyPhase.endTagOtherN)r=r>r?rOr?rrrr;r<r=r r>rrr r)rr)r*InTableBodyPhases    rCcseZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZddZdS)zgetPhases..InRowPhasecsv|||td|jfd|jfd|jfg|_|j|j_td|j fd|j fd|j fd|j fg|_ |j|j _dS)Nrr:)rrrrrrrrrr )rrrrrrr)rOr rrr<r=rr r endTagTrrr>rrr rrr)r*rOAs$ z&getPhases..InRowPhase.__init__cSs@|jjdjdkr<|jdd|jjdji|jjqdS)Nr)rrz'unexpected-implied-end-tag-in-table-rowrD)rBrrDrPrr+rFr)r)r*clearStackToTableRowContextUs z9getPhases..InRowPhase.clearStackToTableRowContextcSs|jjddd S)Nrrrr)rFr)r)r*ignoreEndTagTr[sz,getPhases..InRowPhase.ignoreEndTagTrcSs|jjddSr8r+rFr)r)r*r_sz(getPhases..InRowPhase.processEOFcSs|jjd|Sr8rArr)r)r*rbsz4getPhases..InRowPhase.processSpaceCharacterscSs|jjd|Sr8r,rr)r)r*resz/getPhases..InRowPhase.processCharacterscSs6||j||jjd|j_|jjtdS)Nr) rErBrrPrMrerrr rr)r)r*r<hs z/getPhases..InRowPhase.startTagTableCellcSs"|}|td|s|SdSNrrFrDrr/r)r)r*r=nsz0getPhases..InRowPhase.startTagTableOthercSs|jjd|Sr8rrr)r)r*r usz+getPhases..InRowPhase.startTagOthercSsH|s.||jj|jjd|j_n|jjs:t |j dSr) rFrErBrr+rPrMrerXrrrr)r)r*rDxs   z&getPhases..InRowPhase.endTagTrcSs"|}|td|s|SdSrGrHr/r)r)r*rsz)getPhases..InRowPhase.endTagTablecSs4|jj|dddr&|td|S|jdS)NrDrrr)rBrrDrrPrrr)r)r*r>sz1getPhases..InRowPhase.endTagTableRowGroupcSs|jdd|didS)Nzunexpected-end-tag-in-table-rowrDrrr)r)r*rs z*getPhases..InRowPhase.endTagIgnorecSs|jjd|Sr8r1rr)r)r*r sz)getPhases..InRowPhase.endTagOtherN)r=r>r?rOrErFrrrr<r=r rDrr>rr r)rr)r* InRowPhase?s  rIcs`eZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ dS)zgetPhases..InCellPhasecsf|||td|jfd|jfg|_|j|j_td|jfd|j fd|j fg|_ |j |j _dS)Nrr&r:)rrrrrr) rOr rrr=rr r endTagTableCellr endTagImplyrr rrr)r*rOs z'getPhases..InCellPhase.__init__cSsB|jjdddr |tdn|jjdddr>|tddSNrrrr)rBrrJrrFr)r)r* closeCellsz(getPhases..InCellPhase.closeCellcSs|jjddSrr+rFr)r)r*rsz)getPhases..InCellPhase.processEOFcSs|jjd|Srr,rr)r)r*rsz0getPhases..InCellPhase.processCharacterscSsF|jjddds |jjdddr,||S|jjs8t|jdSrL)rBrrMrPrXrrrr)r)r*r=s z1getPhases..InCellPhase.startTagTableOthercSs|jjd|Srrrr)r)r*r sz,getPhases..InCellPhase.startTagOthercSs|jj|dddr|j|d|jjdj|dkrp|jdd|di|jj}|j|dkrPq|qPn |jj|j|jj d|j_ n|jdd|didS)NrDrrrzunexpected-cell-end-tagrr;) rBrrrrDrPrr+rrMrer9r)r)r*rJs    z.getPhases..InCellPhase.endTagTableCellcSs|jdd|didSr:rrr)r)r*rsz+getPhases..InCellPhase.endTagIgnorecSs.|jj|dddr ||S|jdS)NrDrr)rBrrMrPrrr)r)r*rKsz*getPhases..InCellPhase.endTagImplycSs|jjd|Srr1rr)r)r*r sz*getPhases..InCellPhase.endTagOtherN) r=r>r?rOrMrrr=r rJrrKr r)rr)r* InCellPhases  rNcsxeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZdS)z getPhases..InSelectPhasec s|||td|jfd|jfd|jfd|jfd|jfd|jfg|_ |j |j _ td|j fd|j fd|jfg|_|j|j_ dS)Nrrrr)rr|rr)rOr rrstartTagOptionstartTagOptgrouprrr#rr r  endTagOptionendTagOptgroup endTagSelectrr rrr)r*rOs  z)getPhases..InSelectPhase.__init__cSs0|jjdjdkr |jdn |jjs,tdS)Nrrz eof-in-selectrrFr)r)r*rsz+getPhases..InSelectPhase.processEOFcSs$|ddkrdS|j|ddSr#rrr)r)r*r s z2getPhases..InSelectPhase.processCharacterscSs.|jjdjdkr|jj|j|dSrrBrrDr+rrr)r)r*rO s z/getPhases..InSelectPhase.startTagOptioncSsL|jjdjdkr|jj|jjdjdkr<|jj|j|dS)NrrrrTrr)r)r*rP s   z1getPhases..InSelectPhase.startTagOptgroupcSs|jd|tddS)Nzunexpected-select-in-selectr)rPrrSrrr)r)r*r s z/getPhases..InSelectPhase.startTagSelectcSs>|jd|jjdddr.|td|S|jjs:tdS)Nzunexpected-input-in-selectrr)rPrrBrrSrrXrrr)r)r*r s  z.getPhases..InSelectPhase.startTagInputcSs|jjd|Sr@rrr)r)r*r# sz/getPhases..InSelectPhase.startTagScriptcSs|jdd|didS)Nzunexpected-start-tag-in-selectrDrrr)r)r*r ! s z.getPhases..InSelectPhase.startTagOthercSs6|jjdjdkr |jjn|jdddidS)Nrrunexpected-end-tag-in-selectrDrBrrDr+rPrrr)r)r*rQ% s z-getPhases..InSelectPhase.endTagOptioncSsf|jjdjdkr0|jjdjdkr0|jj|jjdjdkrP|jjn|jdddidS)NrrrrUrDrVrr)r)r*rR, s z/getPhases..InSelectPhase.endTagOptgroupcSsZ|jjdddr@|jj}|jdkr4|jj}q|jn|jjsLt|j dS)Nrr) rBrrr+rDrPrgrXrrr9r)r)r*rS9 s    z-getPhases..InSelectPhase.endTagSelectcSs|jdd|didS)NrUrDrrr)r)r*r D s z,getPhases..InSelectPhase.endTagOtherN)r=r>r?rOrrrOrPrrr#r rQrRrSr r)rr)r* InSelectPhases   rXcsHeZdZfddZddZddZddZd d Zd d Zd dZ dS)z'getPhases..InSelectInTablePhasecsN|||td|jfg|_|j|j_td|jfg|_|j |j_dS)N)rrrrrrrr) rOr rrrr r rrr rrr)r*rOI s z0getPhases..InSelectInTablePhase.__init__cSs|jjddSNrr+rFr)r)r*rX sz2getPhases..InSelectInTablePhase.processEOFcSs|jjd|SrYr,rr)r)r*r[ sz9getPhases..InSelectInTablePhase.processCharacterscSs(|jdd|di|td|S)Nz5unexpected-table-element-start-tag-in-select-in-tablerDr)rPrr rrr)r)r*r^ sz5getPhases..InSelectInTablePhase.startTagTablecSs|jjd|SrYrrr)r)r*r c sz5getPhases..InSelectInTablePhase.startTagOthercSs@|jdd|di|jj|dddr<|td|SdS)Nz3unexpected-table-element-end-tag-in-select-in-tablerDrrr)rPrrBrr rrr)r)r*rf sz3getPhases..InSelectInTablePhase.endTagTablecSs|jjd|SrYr1rr)r)r*r l sz3getPhases..InSelectInTablePhase.endTagOtherN) r=r>r?rOrrrr rr r)rr)r*InSelectInTablePhaseH s rZc-seZdZeddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,g,Zfd-d.Zd/d0Zfd1d2Zd3d4Zd5d6Z d7S)8z(getPhases..InForeignContentPhasergrhrNrrrOrirdr+rRrerjrzh1h2h3h4h5h6rrrlr{rcrarZr5rsr\r(r`rrmrnspanrprosubsuprrqrrr_varcs|||dSr.)rOrrr)r*rOy sz1getPhases..InForeignContentPhase.__init__c%Ssnddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%$}|d&|krj||d&|d&<dS)'NaltGlyph altGlyphDef altGlyphItem animateColor animateMotionanimateTransformclipPathfeBlend feColorMatrixfeComponentTransfer feCompositefeConvolveMatrixfeDiffuseLightingfeDisplacementMapfeDistantLightfeFloodfeFuncAfeFuncBfeFuncGfeFuncRfeGaussianBlurfeImagefeMerge feMergeNode feMorphologyfeOffset fePointLightfeSpecularLighting feSpotLightfeTile feTurbulence foreignObjectglyphReflinearGradientradialGradienttextPath)$altglyph altglyphdef altglyphitem animatecolor animatemotionanimatetransformclippathfeblend fecolormatrixfecomponenttransfer fecompositefeconvolvematrixfediffuselightingfedisplacementmapfedistantlightfefloodfefuncafefuncbfefuncgfefuncrfegaussianblurfeimagefemerge femergenode femorphologyfeoffset fepointlightfespecularlighting fespotlightfetile feturbulence foreignobjectglyphreflineargradientradialgradienttextpathrDr))rGr replacementsr)r)r*adjustSVGTagNames| sN% z:getPhases..InForeignContentPhase.adjustSVGTagNamescsL|ddkrd|d<n&|jjr.InForeignContentPhase.processCharacters..F)rPrjrrrrr)r*r s  z:getPhases..InForeignContentPhase.processCharacterscSs.|jjd}|d|jksD|ddkrt|dtdddg@r|jdd|di|jjdj|jjkr|j |jjds|j |jjds|jj qZ|S|jt d kr|j |n$|jt d kr|||j||j||j|d <|j||d r*|jj d |d<dS)NrrDrkrcolorfacesizez*unexpected-html-element-in-foreign-contentrorrqrTr)rBrbreakoutElementssetkeysrPrrqrrvrwr+rrrrrr)rGrrr)r)r*r s:           z8getPhases..InForeignContentPhase.processStartTagcSst|jjd}|jjd}|jt|dkrF|jdd|di|jt|dkr|jj|jj dkr|jj |jjj |j_|jj |kr|jjst qd}q|d8}|jj|}|j|jjkrqFqF|jj|}qqF|S)NrrrDr;r)rrBrrDrsrrPrrerMr!rr+rrqrr)rGr nodeIndexrrr)r)r*r s&   z6getPhases..InForeignContentPhase.processEndTagN) r=r>r?rrrOrrrrr)rr)r*InForeignContentPhaseo s\  ) rcsPeZdZfddZddZddZddZd d Zd d Zd dZ ddZ dS)z!getPhases..AfterBodyPhasecsN|||td|jfg|_|j|j_td|jfg|_|j |j_dSNr) rOr rrrr r rrr rrr)r*rO s z*getPhases..AfterBodyPhase.__init__cSsdSr.r)rFr)r)r*r sz,getPhases..AfterBodyPhase.processEOFcSs|j||jjddS)Nrrrr)r)r*r sz0getPhases..AfterBodyPhase.processCommentcSs |jd|jjd|j_|S)Nzunexpected-char-after-bodyrrPrrMrerr)r)r*r s z3getPhases..AfterBodyPhase.processCharacterscSs|jjd|Srrrr)r)r*r sz.getPhases..AfterBodyPhase.startTagHtmlcSs*|jdd|di|jjd|j_|S)Nzunexpected-start-tag-after-bodyrDrrrr)r)r*r  s  z/getPhases..AfterBodyPhase.startTagOthercSs*|jjr|jdn|jjd|j_dS)Nz'unexpected-end-tag-after-body-innerhtmlafterAfterBody)rPrXrrMre)rGrDr)r)r*r sz,getPhases..AfterBodyPhase.endTagHtmlcSs*|jdd|di|jjd|j_|S)Nzunexpected-end-tag-after-bodyrDrrrr)r)r*r  s  z-getPhases..AfterBodyPhase.endTagOtherN) r=r>r?rOrrrrr rr r)rr)r*AfterBodyPhase s rcsXeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)z"getPhases..InFramesetPhasecsf|||td|jfd|jfd|jfd|jfg|_|j|j_ td|j fg|_ |j |j _ dS)Nrrrr) rOr rrrG startTagFramestartTagNoframesrr r endTagFramesetrr rrr)r*rO s z+getPhases..InFramesetPhase.__init__cSs0|jjdjdkr |jdn |jjs,tdS)Nrrzeof-in-framesetrrFr)r)r*r# sz-getPhases..InFramesetPhase.processEOFcSs|jddS)Nzunexpected-char-in-framesetrrr)r)r*r) sz4getPhases..InFramesetPhase.processCharacterscSs|j|dSr.)rBrrr)r)r*rG, sz3getPhases..InFramesetPhase.startTagFramesetcSs|j||jjdSr.r*rr)r)r*r/ s z0getPhases..InFramesetPhase.startTagFramecSs|jjd|Srrrr)r)r*r3 sz3getPhases..InFramesetPhase.startTagNoframescSs|jdd|didS)Nz unexpected-start-tag-in-framesetrDrrr)r)r*r 6 s z0getPhases..InFramesetPhase.startTagOthercSsZ|jjdjdkr |jdn |jj|jjsV|jjdjdkrV|jjd|j_dS)Nrrz)unexpected-frameset-in-frameset-innerhtmlr afterFrameset) rBrrDrPrr+rXrMrerr)r)r*r: s z1getPhases..InFramesetPhase.endTagFramesetcSs|jdd|didS)Nzunexpected-end-tag-in-framesetrDrrr)r)r*r F s z.getPhases..InFramesetPhase.endTagOtherN) r=r>r?rOrrrGrrr rr r)rr)r*InFramesetPhase s  rcsHeZdZfddZddZddZddZd d Zd d Zd dZ dS)z%getPhases..AfterFramesetPhasecsV|||td|jfd|jfg|_|j|j_td|jfg|_ |j |j _dSNrr) rOr rrrrr r rrr rrr)r*rOL s z.getPhases..AfterFramesetPhase.__init__cSsdSr.r)rFr)r)r*rZ sz0getPhases..AfterFramesetPhase.processEOFcSs|jddS)Nzunexpected-char-after-framesetrrr)r)r*r^ sz7getPhases..AfterFramesetPhase.processCharacterscSs|jjd|Sr@rrr)r)r*ra sz6getPhases..AfterFramesetPhase.startTagNoframescSs|jdd|didS)Nz#unexpected-start-tag-after-framesetrDrrr)r)r*r d s z3getPhases..AfterFramesetPhase.startTagOthercSs|jjd|j_dS)NafterAfterFrameset)rPrMrerr)r)r*rh sz0getPhases..AfterFramesetPhase.endTagHtmlcSs|jdd|didS)Nz!unexpected-end-tag-after-framesetrDrrr)r)r*r k s z1getPhases..AfterFramesetPhase.endTagOtherN) r=r>r?rOrrrr rr r)rr)r*AfterFramesetPhaseJ s rcsPeZdZfddZddZddZddZd d Zd d Zd dZ ddZ dS)z&getPhases..AfterAfterBodyPhasecs0|||td|jfg|_|j|j_dSr)rOr rrrr r rrr)r*rOp s z/getPhases..AfterAfterBodyPhase.__init__cSsdSr.r)rFr)r)r*rx sz1getPhases..AfterAfterBodyPhase.processEOFcSs|j||jjdSr.rrr)r)r*r{ sz5getPhases..AfterAfterBodyPhase.processCommentcSs|jjd|SrrArr)r)r*r~ sz=getPhases..AfterAfterBodyPhase.processSpaceCharacterscSs |jd|jjd|j_|S)Nexpected-eof-but-got-charrrrr)r)r*r s z8getPhases..AfterAfterBodyPhase.processCharacterscSs|jjd|Srrrr)r)r*r sz3getPhases..AfterAfterBodyPhase.startTagHtmlcSs*|jdd|di|jjd|j_|S)Nexpected-eof-but-got-start-tagrDrrrr)r)r*r  s  z4getPhases..AfterAfterBodyPhase.startTagOthercSs*|jdd|di|jjd|j_|S)Nexpected-eof-but-got-end-tagrDrrrr)r)r*r s  z4getPhases..AfterAfterBodyPhase.processEndTagN) r=r>r?rOrrrrrr rr)rr)r*AfterAfterBodyPhaseo s rcsXeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)z*getPhases..AfterAfterFramesetPhasecs8|||td|jfd|jfg|_|j|j_dSr)rOr rrstartTagNoFramesrr r rrr)r*rO s z3getPhases..AfterAfterFramesetPhase.__init__cSsdSr.r)rFr)r)r*r sz5getPhases..AfterAfterFramesetPhase.processEOFcSs|j||jjdSr.rrr)r)r*r sz9getPhases..AfterAfterFramesetPhase.processCommentcSs|jjd|SrrArr)r)r*r szAgetPhases..AfterAfterFramesetPhase.processSpaceCharacterscSs|jddS)Nrrrr)r)r*r sz.AfterAfterFramesetPhase.processCharacterscSs|jjd|Srrrr)r)r*r sz7getPhases..AfterAfterFramesetPhase.startTagHtmlcSs|jjd|Sr@rrr)r)r*r sz;getPhases..AfterAfterFramesetPhase.startTagNoFramescSs|jdd|didS)NrrDrrr)r)r*r  s z8getPhases..AfterAfterFramesetPhase.startTagOthercSs|jdd|didS)NrrDrrr)r)r*r s z8getPhases..AfterAfterFramesetPhase.processEndTagN) r=r>r?rOrrrrrrr rr)rr)r*AfterAfterFramesetPhase s r)r\r[rrr5r7rrrrrrrrrrrrrrrrr)r)rNr^rrrrr<rErJrr rr%r2r9rCrIrNrXrZrrrrrrr)rr*rLsr)#.g@CX!-GBbYLd's/9%&&rLcs>t|dt@}|r:tfdd|dD|d<dS)Nrc3s"|]\}}|||fVqdSr.)r)rCkvrr)r*r sz$adjust_attributes..)rrr/)rrneeds_adjustmentr)rr*r s   rr{FcCs|dkr i}t||||dS)N)r3rDrr)r)rDr3rrrr)r)r*r s  rc@seZdZdZdS)r~zError in parsed documentN)r=r>r?rr)r)r)r*r~ sr~)rT)r+rT)r{NF). __future__rrrZpip._vendor.sixrrr1 collectionsrrr r r Ztreebuilders.baser r constantsrrrrrrrrrrrrrrrrrr#r-rArwr"memoizerLrr Exceptionr~r)r)r)r*sP      H   G K