U o a$+@sddlmZmZmZddlmZddlmZddl m Z ddl m Z ddl m Z m Z ddl mZmZmZdd l mZmZdd l mZdd lmZdd lmZee ZGd ddeZdS))absolute_importdivisionunicode_literals)unichr)deque)spaceCharacters)entities) asciiLettersasciiUpper2Lower)digits hexDigitsEOF) tokenTypes tagTokenTypes)replacementCharacters)HTMLInputStream)TriecsdeZdZdZdfdd ZddZddZdd d Zd d ZddZ ddZ ddZ ddZ ddZ ddZddZddZddZd d!Zd"d#Zd$d%Zd&d'Zd(d)Zd*d+Zd,d-Zd.d/Zd0d1Zd2d3Zd4d5Zd6d7Zd8d9Zd:d;Zdd?Z!d@dAZ"dBdCZ#dDdEZ$dFdGZ%dHdIZ&dJdKZ'dLdMZ(dNdOZ)dPdQZ*dRdSZ+dTdUZ,dVdWZ-dXdYZ.dZd[Z/d\d]Z0d^d_Z1d`daZ2dbdcZ3dddeZ4dfdgZ5dhdiZ6djdkZ7dldmZ8dndoZ9dpdqZ:drdsZ;dtduZdzd{Z?d|d}Z@d~dZAddZBddZCddZDddZEddZFddZGddZHddZIddZJddZKddZLZMS) HTMLTokenizera  This class takes care of tokenizing HTML. * self.currentToken Holds the token that is currently being processed. * self.state Holds a reference to the method to be invoked... XXX * self.stream Points to HTMLInputStream object. Nc sFt|f||_||_d|_g|_|j|_d|_d|_t t | dS)NF) rstreamparser escapeFlag lastFourChars dataStatestateescape currentTokensuperr__init__)selfrrkwargs __class__`C:\Users\vtejo\AppData\Local\Temp\pip-unpacked-wheel-6mt8ur68\pip\_vendor\html5lib\_tokenizer.pyr"szHTMLTokenizer.__init__ccsPtg|_|rL|jjr6td|jjddVq|jr |jVq6q dS)z This is where the magic happens. We do our usually processing through the states and when we have a token to return we yield the token which pauses processing until the next token is requested. ParseErrorrtypedataN)r tokenQueuerrerrorsrpoppopleftrr#r#r$__iter__1s  zHTMLTokenizer.__iter__c %Cst}d}|rt}d}g}|j}||krH|tk rH|||j}q"td||}|tkrt|}|j t ddd|idnbd|krd ksn|d krd }|j t ddd|idn d |krd ksnd|krdksnd|krdksnd|kr,dksn|t ddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d g#kr|j t ddd|idz t |}Wn>t k r|d6}t d|d?Bt d7|d8@B}YnX|d9kr|j t dd:d;|j||S)r r rKr)rr+r@ entitiesTriehas_keys_with_prefixlongest_prefixlenKeyErrorr r r) r allowedChar fromAttributeoutputrHhex entityName entityLength tokenTyper#r#r$ consumeEntitys~               zHTMLTokenizer.consumeEntitycCs|j|dddS)zIThis method replaces the need for "entityInAttributeValueState". T)rZr[N)ra)rrZr#r#r$processEntityInAttributesz&HTMLTokenizer.processEntityInAttributecCs|j}|dtkrp|dt|d<|dtdkrp|drR|jtddd|drp|jtdd d|j||j|_d S) zThis method is a generic handler for emitting the tags. It also sets the state to "data" because that's what's needed after a token has been emitted. r'nameEndTagr(r%zattributes-in-end-tagr& selfClosingzself-closing-flag-on-end-tagN) rr translater rr)r>rr)rtokenr#r#r$emitCurrentTokens    zHTMLTokenizer.emitCurrentTokencCs|j}|dkr|j|_n|dkr.|j|_n|dkrd|jtddd|jtdddn`|tkrpdS|t kr|jtd ||j t d dn&|j d }|jtd||dd S) NrLrMr%invalid-codepointr&rTFrSTrLrMri) rr=entityDataStater tagOpenStater)r>rrr charsUntilrr(charsr#r#r$rs.          zHTMLTokenizer.dataStatecCs||j|_dSNT)rarrr-r#r#r$rlszHTMLTokenizer.entityDataStatecCs|j}|dkr|j|_n|dkr.|j|_n|tkr:dS|dkrp|jtddd|jtdd dnT|t kr|jtd ||j t d dn&|j d }|jtd||dd S) NrLrMFrir%rjr&rTr5rSTrk) rr=characterReferenceInRcdatarrcdataLessThanSignStaterr)r>rrrnror#r#r$ rcdataStates.          zHTMLTokenizer.rcdataStatecCs||j|_dSrq)rartrr-r#r#r$rr1sz(HTMLTokenizer.characterReferenceInRcdatacCs|j}|dkr|j|_nh|dkrR|jtddd|jtdddn2|tkr^dS|jd }|jtd||dd S NrMrir%rjr&rTr5F)rMriT) rr=rawtextLessThanSignStaterr)r>rrrnror#r#r$ rawtextState6s"       zHTMLTokenizer.rawtextStatecCs|j}|dkr|j|_nh|dkrR|jtddd|jtdddn2|tkr^dS|jd }|jtd||dd Sru) rr=scriptDataLessThanSignStaterr)r>rrrnror#r#r$scriptDataStateHs"       zHTMLTokenizer.scriptDataStatecCsr|j}|tkrdS|dkrL|jtddd|jtdddn"|jtd||jdddS) NFrir%rjr&rTr5T)rr=rr)r>rrnrr(r#r#r$plaintextStateZs     zHTMLTokenizer.plaintextStatecCs |j}|dkr|j|_n|dkr.|j|_n|tkrVtd|gddd|_|j|_n|dkr|j tddd |j td d d |j |_nt|d kr|j tdd d |j ||j |_n@|j tddd |j td dd |j ||j |_dS)N!/StartTagF)r'rcr(reselfClosingAcknowledged>r%z'expected-tag-name-but-got-right-bracketr&rTz<>?z'expected-tag-name-but-got-question-markzexpected-tag-namerMT)rr=markupDeclarationOpenStatercloseTagOpenStater rr tagNameStater)r>rrDbogusCommentStaterzr#r#r$rmis@           zHTMLTokenizer.tagOpenStatecCs|j}|tkr0td|gdd|_|j|_n|dkrX|jtddd|j |_nn|t kr|jtddd|jtd d d|j |_n0|jtdd d |id |j ||j |_dS)NrdFr'rcr(rerr%z*expected-closing-tag-but-got-right-bracketr&z expected-closing-tag-but-got-eofrTrrrDrrzr#r#r$rs2        zHTMLTokenizer.closeTagOpenStatecCs|j}|tkr|j|_n|dkr.|n~|tkrV|jt ddd|j |_nV|dkrh|j |_nD|dkr|jt ddd|j dd 7<n|j d|7<d S) Nrr%zeof-in-tag-namer&r}rirjrcr5T) rr=rbeforeAttributeNameStaterrhrr)r>rrselfClosingStartTagStaterrzr#r#r$rs&       zHTMLTokenizer.tagNameStatecCsP|j}|dkr"d|_|j|_n*|jtddd|j||j |_dSNr}r1rTrMr&T) rr=temporaryBufferrcdataEndTagOpenStaterr)r>rrDrtrzr#r#r$rss   z%HTMLTokenizer.rcdataLessThanSignStatecCsX|j}|tkr*|j|7_|j|_n*|jtddd|j ||j |_dSNrTrr&T) rr=r rrcdataEndTagNameStaterr)r>rrDrtrzr#r#r$rs   z#HTMLTokenizer.rcdataEndTagOpenStatecCs|jo|jd|jk}|j}|tkrT|rTtd|jgdd|_|j|_n|dkr|rtd|jgdd|_|j |_n||dkr|rtd|jgdd|_| |j |_nH|t kr|j|7_n0|j tdd|jd |j||j|_d S NrcrdFrr}rrTrr&T)rlowerrrr=rrrrrrhrr r)r>rDrtr appropriater(r#r#r$rs@         z#HTMLTokenizer.rcdataEndTagNameStatecCsP|j}|dkr"d|_|j|_n*|jtddd|j||j |_dSr) rr=rrawtextEndTagOpenStaterr)r>rrDrwrzr#r#r$rvs   z&HTMLTokenizer.rawtextLessThanSignStatecCsX|j}|tkr*|j|7_|j|_n*|jtddd|j ||j |_dSr) rr=r rrawtextEndTagNameStaterr)r>rrDrwrzr#r#r$rs   z$HTMLTokenizer.rawtextEndTagOpenStatecCs|jo|jd|jk}|j}|tkrT|rTtd|jgdd|_|j|_n|dkr|rtd|jgdd|_|j |_n||dkr|rtd|jgdd|_| |j |_nH|t kr|j|7_n0|j tdd|jd |j||j|_d Sr)rrrrr=rrrrrrhrr r)r>rDrwrr#r#r$rs@         z$HTMLTokenizer.rawtextEndTagNameStatecCsx|j}|dkr"d|_|j|_nR|dkrJ|jtddd|j|_n*|jtddd|j ||j |_dS) Nr}r1r|rTzrscriptDataEscapeStartStaterDryrzr#r#r$rxs    z)HTMLTokenizer.scriptDataLessThanSignStatecCsX|j}|tkr*|j|7_|j|_n*|jtddd|j ||j |_dSr) rr=r rscriptDataEndTagNameStaterr)r>rrDryrzr#r#r$r,s   z'HTMLTokenizer.scriptDataEndTagOpenStatecCs|jo|jd|jk}|j}|tkrT|rTtd|jgdd|_|j|_n|dkr|rtd|jgdd|_|j |_n||dkr|rtd|jgdd|_| |j |_nH|t kr|j|7_n0|j tdd|jd |j||j|_d Sr)rrrrr=rrrrrrhrr r)r>rDryrr#r#r$r7s@         z'HTMLTokenizer.scriptDataEndTagNameStatecCsJ|j}|dkr2|jtddd|j|_n|j||j|_dSN-rTr&T) rr=r)r>rscriptDataEscapeStartDashStaterrDryrzr#r#r$rSs   z(HTMLTokenizer.scriptDataEscapeStartStatecCsJ|j}|dkr2|jtddd|j|_n|j||j|_dSr) rr=r)r>rscriptDataEscapedDashDashStaterrDryrzr#r#r$r]s   z,HTMLTokenizer.scriptDataEscapeStartDashStatecCs|j}|dkr2|jtddd|j|_n|dkrD|j|_nn|dkrz|jtddd|jtdddn8|tkr|j |_n&|j d }|jtd||dd S) NrrTr&rMrir%rjr5)rMrriT) rr=r)r>rscriptDataEscapedDashStater"scriptDataEscapedLessThanSignStaterrrnror#r#r$scriptDataEscapedStategs(         z$HTMLTokenizer.scriptDataEscapedStatecCs|j}|dkr2|jtddd|j|_n|dkrD|j|_nn|dkr|jtddd|jtddd|j|_n0|t kr|j |_n|jtd|d|j|_d S) NrrTr&rMrir%rjr5T) rr=r)r>rrrrrrrrzr#r#r$r{s&       z(HTMLTokenizer.scriptDataEscapedDashStatecCs|j}|dkr*|jtdddn|dkr<|j|_n|dkrd|jtddd|j|_nn|dkr|jtddd|jtdd d|j|_n0|t kr|j |_n|jtd|d|j|_d S) NrrTr&rMrrir%rjr5T) rr=r)r>rrrryrrrrzr#r#r$rs*       z,HTMLTokenizer.scriptDataEscapedDashDashStatecCs|j}|dkr"d|_|j|_n\|tkrT|jtdd|d||_|j |_n*|jtddd|j ||j |_dSr) rr=r scriptDataEscapedEndTagOpenStaterr r)r>r scriptDataDoubleEscapeStartStaterDrrzr#r#r$rs    z0HTMLTokenizer.scriptDataEscapedLessThanSignStatecCsP|j}|tkr"||_|j|_n*|jtddd|j ||j |_dSr) rr=r r scriptDataEscapedEndTagNameStaterr)r>rrDrrzr#r#r$rs   z.HTMLTokenizer.scriptDataEscapedEndTagOpenStatecCs|jo|jd|jk}|j}|tkrT|rTtd|jgdd|_|j|_n|dkr|rtd|jgdd|_|j |_n||dkr|rtd|jgdd|_| |j |_nH|t kr|j|7_n0|j tdd|jd |j||j|_d Sr)rrrrr=rrrrrrhrr r)r>rDrrr#r#r$rs@         z.HTMLTokenizer.scriptDataEscapedEndTagNameStatecCs|j}|ttdBkrR|jtd|d|jdkrH|j |_ q|j |_ nB|t kr|jtd|d|j|7_n|j ||j |_ dSN)r}rrTr&scriptT)rr=rrAr)r>rrrscriptDataDoubleEscapedStaterrr rDrzr#r#r$rs    z.HTMLTokenizer.scriptDataDoubleEscapeStartStatecCs|j}|dkr2|jtddd|j|_n|dkrZ|jtddd|j|_nt|dkr|jtddd|jtdddn>|tkr|jtdd d|j |_n|jtd|dd S NrrTr&rMrir%rjr5eof-in-script-in-scriptT) rr=r)r>r scriptDataDoubleEscapedDashStater(scriptDataDoubleEscapedLessThanSignStaterrrzr#r#r$rs*        z*HTMLTokenizer.scriptDataDoubleEscapedStatecCs|j}|dkr2|jtddd|j|_n|dkrZ|jtddd|j|_n|dkr|jtddd|jtddd|j|_nF|t kr|jtdd d|j |_n|jtd|d|j|_d Sr) rr=r)r>r$scriptDataDoubleEscapedDashDashStaterrrrrrzr#r#r$rs.        z.HTMLTokenizer.scriptDataDoubleEscapedDashStatecCs|j}|dkr*|jtdddn|dkrR|jtddd|j|_n|dkrz|jtddd|j|_n|dkr|jtddd|jtdd d|j|_nF|t kr|jtdd d|j |_n|jtd|d|j|_d S) NrrTr&rMrrir%rjr5rT) rr=r)r>rrrryrrrrzr#r#r$rs2        z2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatecCsP|j}|dkr8|jtdddd|_|j|_n|j||j |_dS)Nr}rTr&r1T) rr=r)r>rrscriptDataDoubleEscapeEndStaterrDrrzr#r#r$r0s   z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatecCs|j}|ttdBkrR|jtd|d|jdkrH|j |_ q|j |_ nB|t kr|jtd|d|j|7_n|j ||j |_ dSr)rr=rrAr)r>rrrrrrr rDrzr#r#r$r;s    z,HTMLTokenizer.scriptDataDoubleEscapeEndStatecCs0|j}|tkr$|jtdn|tkrJ|jd|dg|j|_n|dkr\| n|dkrn|j |_n|dkr|j t ddd |jd|dg|j|_n|d kr|j t dd d |jdd dg|j|_nF|t kr|j t dd d |j|_n|jd|dg|j|_dS)NTr(r1rr})'"rRrMr%#invalid-character-in-attribute-namer&rirjr5z#expected-attribute-name-but-got-eof)rr=rrnr rr>attributeNameStaterrhrr)rrrrzr#r#r$rKs<           z&HTMLTokenizer.beforeAttributeNameStatecCs|j}d}d}|dkr&|j|_n.|tkr\|jddd||jtd7<d}n|dkrjd}n|tkr||j|_n|dkr|j |_n|d kr|j t d d d |jdddd 7<d}n|dkr |j t d dd |jddd|7<d}nH|t kr6|j t d dd |j|_n|jddd|7<d}|r|jdddt|jddd<|jdddD]>\}}|jddd|kr|j t d dd qҐq|r|dS)NTFrRr(rOrrr}rir%rjr&r5rrrMrzeof-in-attribute-namezduplicate-attribute)rr=beforeAttributeValueStaterr rrnrafterAttributeNameStaterr)r>rrrrfr rh)rr(leavingThisState emitTokenrc_r#r#r$ris^             z HTMLTokenizer.attributeNameStatecCsD|j}|tkr$|jtdn|dkr8|j|_n|dkrJ|n|tkrp|jd |dg|j |_n|dkr|j |_n|dkr|j t dd d |jd d dg|j |_n|d kr|j t dd d |jd |dg|j |_nF|tkr$|j t ddd |j|_n|jd |dg|j |_dS)NTrRrr(r1r}rir%rjr&r5rz&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)rr=rrnrrrhr rr>rrr)rrrrzr#r#r$rs@            z%HTMLTokenizer.afterAttributeNameStatecCsh|j}|tkr$|jtdn@|dkr8|j|_n,|dkrX|j|_|j|n |dkrj|j|_n|dkr|j t ddd| n|d kr|j t dd d|j d d d d7<|j|_n|dkr|j t ddd|j d d d |7<|j|_nL|tkrB|j t ddd|j|_n"|j d d d |7<|j|_dS)NTrrLrrr%z.expected-attribute-value-but-got-right-bracketr&rirjr(rOrr5)rRrM`z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eof)rr=rrnattributeValueDoubleQuotedStaterattributeValueUnQuotedStaterDattributeValueSingleQuotedStater)r>rrhrrrrzr#r#r$rsF             z'HTMLTokenizer.beforeAttributeValueStatecCs|j}|dkr|j|_n|dkr0|dn|dkrj|jtddd|jddd d 7<nN|t kr|jtdd d|j |_n&|jddd ||j d 7<d S)NrrLrir%rjr&r(rOrr5z#eof-in-attribute-value-double-quote)rrLriT rr=afterAttributeValueStaterrbr)r>rrrrrnrzr#r#r$rs&       z-HTMLTokenizer.attributeValueDoubleQuotedStatecCs|j}|dkr|j|_n|dkr0|dn|dkrj|jtddd|jddd d 7<nN|t kr|jtdd d|j |_n&|jddd ||j d 7<d S)NrrLrir%rjr&r(rOrr5z#eof-in-attribute-value-single-quote)rrLriTrrzr#r#r$rs&       z-HTMLTokenizer.attributeValueSingleQuotedStatecCs|j}|tkr|j|_n|dkr0|dn|dkrB|n|dkr||jt ddd|j ddd |7<n|d kr|jt dd d|j ddd d 7<nV|t kr|jt dd d|j |_n.|j ddd ||j tdtB7<dS)NrLr)rrrRrMrr%z0unexpected-character-in-unquoted-attribute-valuer&r(rOrrirjr5z eof-in-attribute-value-no-quotes)rLrrrrRrMrriT)rr=rrrrbrhr)r>rrrrrnrArzr#r#r$rs4         z)HTMLTokenizer.attributeValueUnQuotedStatecCs|j}|tkr|j|_n|dkr.|np|dkr@|j|_n^|tkrt|j t ddd|j ||j |_n*|j t ddd|j ||j|_dS)Nrr}r%z$unexpected-EOF-after-attribute-valuer&z*unexpected-character-after-attribute-valueT) rr=rrrrhrrr)r>rrDrrzr#r#r$r s&         z&HTMLTokenizer.afterAttributeValueStatecCs|j}|dkr&d|jd<|n^|tkrZ|jtddd|j||j |_ n*|jtddd|j||j |_ dS)NrTrer%z#unexpected-EOF-after-solidus-in-tagr&z)unexpected-character-after-solidus-in-tag) rr=rrhrr)r>rrDrrrrzr#r#r$r4s         z&HTMLTokenizer.selfClosingStartTagStatecCsD|jd}|dd}|jtd|d|j|j|_dS)Nrrir5Commentr&T) rrnreplacer)r>rr=rrrzr#r#r$rFs    zHTMLTokenizer.bogusCommentStatecCs|jg}|ddkrR||j|ddkrPtddd|_|j|_dSn|ddkrd}dD](}||j|d|krfd }qqf|rtd ddddd |_|j|_dSn|dd krD|jdk rD|jj j rD|jj j dj |jj j krDd}d D].}||j|d|krd }q2q|rD|j |_dS|jtddd|rt|j|qZ|j|_dS)NrOrrr1r&T)dD))oOrICtTyYpPeEFDoctype)r'rcpublicIdsystemIdcorrect[)rrArrrr%zexpected-dashes-or-doctype)rr=r>rrcommentStartStater doctypeStatertree openElements namespacedefaultNamespacecdataSectionStater)rDr+r)rrHmatchedexpectedr#r#r$rUs\       z(HTMLTokenizer.markupDeclarationOpenStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd d|j|j|j|_nP|t kr|jtdd d|j|j|j|_n|jd|7<|j |_d S) Nrrir%rjr&r(r5rincorrect-commenteof-in-commentT) rr=commentStartDashStaterr)r>rrrr commentStaterzr#r#r$rs.       zHTMLTokenizer.commentStartStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd d|j|j|j|_nT|t kr|jtdd d|j|j|j|_n|jdd|7<|j |_d S) Nrrir%rjr&r(-�rrrT) rr=commentEndStaterr)r>rrrrrrzr#r#r$rs.       z#HTMLTokenizer.commentStartDashStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<nT|tkr|jtddd|j|j|j |_n|jd||j d 7<d S) Nrrir%rjr&r(r5r)rriT) rr=commentEndDashStaterr)r>rrrrrnrzr#r#r$rs$       zHTMLTokenizer.commentStatecCs|j}|dkr|j|_n|dkrV|jtddd|jdd7<|j|_nT|t kr|jtddd|j|j|j |_n|jdd|7<|j|_d S) Nrrir%rjr&r(rzeof-in-comment-end-dashT) rr=rrr)r>rrrrrrzr#r#r$rs$      z!HTMLTokenizer.commentEndDashStatecCs,|j}|dkr*|j|j|j|_n|dkrd|jtddd|jdd7<|j|_n|dkr|jtdd d|j |_n|d kr|jtdd d|jd|7<nj|t kr|jtdd d|j|j|j|_n4|jtdd d|jdd|7<|j|_dS)Nrrir%rjr&r(u--�r|z,unexpected-bang-after-double-dash-in-commentrz,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T) rr=r)r>rrrrrcommentEndBangStaterrzr#r#r$rs@          zHTMLTokenizer.commentEndStatecCs|j}|dkr*|j|j|j|_n|dkrN|jdd7<|j|_n|dkr|jtddd|jdd 7<|j |_nT|t kr|jtdd d|j|j|j|_n|jdd|7<|j |_d S) Nrrr(z--!rir%rjr&u--!�zeof-in-comment-end-bang-stateT) rr=r)r>rrrrrrrrzr#r#r$rs,       z!HTMLTokenizer.commentEndBangStatecCs|j}|tkr|j|_nj|tkr\|jtdddd|j d<|j|j |j |_n*|jtddd|j ||j|_dS)Nr%!expected-doctype-name-but-got-eofr&Frzneed-space-after-doctypeT) rr=rbeforeDoctypeNameStaterrr)r>rrrrDrzr#r#r$r s        zHTMLTokenizer.doctypeStatecCs|j}|tkrn|dkrT|jtdddd|jd<|j|j|j|_n|dkr|jtdddd |jd <|j |_nR|t kr|jtdd dd|jd<|j|j|j|_n||jd <|j |_d S) Nrr%z+expected-doctype-name-but-got-right-bracketr&Frrirjr5rcrT) rr=rr)r>rrrrdoctypeNameStaterrzr#r#r$rs4           z$HTMLTokenizer.beforeDoctypeNameStatecCs|j}|tkr2|jdt|jd<|j|_n|dkrh|jdt|jd<|j |j|j |_n|dkr|j t ddd|jdd7<|j |_nh|t kr|j t dddd |jd <|jdt|jd<|j |j|j |_n|jd|7<d S) Nrcrrir%rjr&r5zeof-in-doctype-nameFrT)rr=rrrfr afterDoctypeNameStaterr)r>rrrrrzr#r#r$r6s0        zHTMLTokenizer.doctypeNameStatecCsH|j}|tkrn.|dkr8|j|j|j|_n |tkrd|jd<|j ||jt ddd|j|j|j|_n|dkrd}d D]}|j}||krd}qq|r|j |_dSnD|d kr d}d D]}|j}||krd}qq|r |j |_dS|j ||jt dd d |idd|jd<|j |_dS)NrFrr%eof-in-doctyper&rT))uU)bB)lL)iIrsS)rrrr)mMz*expected-space-or-right-bracket-in-doctyper(r3)rr=rr)r>rrrrrDrafterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)rr(rrr#r#r$rOsT            z#HTMLTokenizer.afterDoctypeNameStatecCs|j}|tkr|j|_n|dkrP|jtddd|j||j|_nT|t kr|jtdddd|j d<|j|j |j |_n|j||j|_dS N)rrr%unexpected-char-in-doctyper&rFrT) rr=r"beforeDoctypePublicIdentifierStaterr)r>rrDrrrrzr#r#r$rs&         z,HTMLTokenizer.afterDoctypePublicKeywordStatecCs|j}|tkrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jt dddd |jd <|j|j|j |_nh|t kr|jt dd dd |jd <|j|j|j |_n(|jt dd dd |jd <|j |_d S)Nrr1rrrr%unexpected-end-of-doctyper&FrrrT) rr=rr(doctypePublicIdentifierDoubleQuotedStater(doctypePublicIdentifierSingleQuotedStater)r>rrrrrzr#r#r$rs:             z0HTMLTokenizer.beforeDoctypePublicIdentifierStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd dd |jd <|j|j|j|_nR|t kr|jtdd dd |jd <|j|j|j|_n|jd|7<d S)Nrrir%rjr&rr5rrFrrT rr=!afterDoctypePublicIdentifierStaterr)r>rrrrrzr#r#r$rs0         z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd dd |jd <|j|j|j|_nR|t kr|jtdd dd |jd <|j|j|j|_n|jd|7<d S)Nrrir%rjr&rr5rrFrrTrrzr#r#r$rs0         z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatecCs |j}|tkr|j|_n|dkr<|j|j|j|_n|dkrn|jt dddd|jd<|j |_n|dkr|jt dddd|jd<|j |_nh|t kr|jt dd dd |jd <|j|j|j|_n(|jt dddd |jd <|j |_d S) Nrrr%rr&r1rrrFrT)rr=r-betweenDoctypePublicAndSystemIdentifiersStaterr)r>rrr(doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStaterrrzr#r#r$rs>              z/HTMLTokenizer.afterDoctypePublicIdentifierStatecCs|j}|tkrn|dkr4|j|j|j|_n|dkrPd|jd<|j|_n|dkrld|jd<|j |_nh|t kr|jt dddd |jd <|j|j|j|_n(|jt dd dd |jd <|j |_d S) Nrrr1rrr%rr&FrrT) rr=rr)r>rrrrrrrrrzr#r#r$rs2           z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatecCs|j}|tkr|j|_n|dkrP|jtddd|j||j|_nT|t kr|jtdddd|j d<|j|j |j |_n|j||j|_dSr) rr=r"beforeDoctypeSystemIdentifierStaterr)r>rrDrrrrzr#r#r$rs&         z,HTMLTokenizer.afterDoctypeSystemKeywordStatecCs|j}|tkrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jt dddd |jd <|j|j|j |_nh|t kr|jt dd dd |jd <|j|j|j |_n(|jt dddd |jd <|j |_d S) Nrr1rrrr%rr&FrrT) rr=rrrrrr)r>rrrrrzr#r#r$r/s:             z0HTMLTokenizer.beforeDoctypeSystemIdentifierStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd dd |jd <|j|j|j|_nR|t kr|jtdd dd |jd <|j|j|j|_n|jd|7<d S)Nrrir%rjr&rr5rrFrrT rr=!afterDoctypeSystemIdentifierStaterr)r>rrrrrzr#r#r$rLs0         z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd dd |jd <|j|j|j|_nR|t kr|jtdd dd |jd <|j|j|j|_n|jd|7<d S)Nrrir%rjr&rr5rrFrrTrrzr#r#r$rds0         z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatecCs|j}|tkrn~|dkr4|j|j|j|_n^|tkrt|jt dddd|jd<|j|j|j|_n|jt ddd|j |_dS) Nrr%rr&FrrT) rr=rr)r>rrrrrrrzr#r#r$r|s$      z/HTMLTokenizer.afterDoctypeSystemIdentifierStatecCsZ|j}|dkr*|j|j|j|_n,|tkrV|j||j|j|j|_ndS)NrT) rr=r)r>rrrrrDrzr#r#r$rs    zHTMLTokenizer.bogusDoctypeStatecCsg}||jd||jd|j}|tkr>qq|dksJt|ddddkrv|ddd|d<qq||qd|}|d}|dkrt|D]}|j t d d d q| dd }|r|j t d |d |j |_ dS)N]rrOz]]r1rirr%rjr&r5rTT)r>rrnr=rAssertionErrorr@countranger)rrrr)rr(r= nullCountrr#r#r$rs2          zHTMLTokenizer.cdataSectionState)N)NF)N__name__ __module__ __qualname____doc__rr.rKrarbrhrrlrtrrrwryr{rmrrrsrrrvrrrxrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr __classcell__r#r#r!r$rs H P#         6 "-3rN) __future__rrrZpip._vendor.sixrrB collectionsr constantsrr r r r r rrrr _inputstreamr_trierrUobjectrr#r#r#r$s