
    Mf                        d Z dZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
mZmZmZmZmZmZmZmZ ddlmZmZmZ eZ	 ddlmZ dZn# e$ r d	ZY nw xY w	 ddlZdZn# e$ r d	ZY nw xY w	 ddlZdZ ej!        j"        Z"n# e$ r d	Z d
Z"Y nw xY wdZ# G d de$          Z% G d de$          Z& G d de&          Z' G d de&          Z( G d de'          Z)dS )zHelper classes for tests.MIT    N)BeautifulSoup)	CharsetMetaAttributeValueCommentContentMetaAttributeValueDoctypePYTHON_SPECIFIC_ENCODINGSSoupStrainerScript
StylesheetTag)DetectsXMLParsedAsHTMLHTMLParserTreeBuilderXMLParsedAsHTMLWarning)SelectorSyntaxErrorTF)r   u-
  A bare string
<!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">
<!DOCTYPE xsl:stylesheet PUBLIC "htmlent.dtd">
<div><![CDATA[A CDATA section where it doesn't belong]]></div>
<div><svg><![CDATA[HTML5 does allow CDATA sections in SVG]]></svg></div>
<div>A <meta> tag</div>
<div>A <br> tag that supposedly has contents.</br></div>
<div>AT&T</div>
<div><textarea>Within a textarea, markup like <b> tags and <&<&amp; should be treated as literal</textarea></div>
<div><script>if (i < 2) { alert("<b>Markup within script tags should be treated as literal.</b>"); }</script></div>
<div>This numeric entity is missing the final semicolon: <x t="pi&#241ata"></div>
<div><a href="http://example.com/</a> that attribute value never got closed</div>
<div><a href="foo</a>, </a><a href="bar">that attribute value was closed by the subsequent tag</a></div>
<! This document starts with a bogus declaration ><div>a</div>
<div>This document contains <!an incomplete declaration <div>(do you see it?)</div>
<div>This document ends with <!an incomplete declaration
<div><a style={height:21px;}>That attribute value was bogus</a></div>
<! DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">The doctype is invalid because it contains extra whitespace
<div><table><td nowrap>That boolean attribute had no value</td></table></div>
<div>Here's a nonexistent entity: &#foo; (do you see it?)</div>
<div>This document ends before the entity finishes: &gt
<div><p>Paragraphs shouldn't contain block display elements, but this one does: <dl><dt>you see?</dt></p>
<b b="20" a="1" b="10" a="2" a="3" a="4">Multiple values for the same attribute.</b>
<div><table><tr><td>Here's a table</td></tr></table></div>
<div><table id="1"><tr><td>Here's a nested table:<table id="2"><tr><td>foo</td></tr></table></td></div>
<div>This tag contains nothing but whitespace: <b>    </b></div>
<div><blockquote><p><b>This p tag is cut off by</blockquote></p>the end of the blockquote tag</div>
<div><table><div>This table contains bare markup</div></table></div>
<div><div id="1">
 <a href="link1">This link is never closed.
</div>
<div id="2">
 <div id="3">
   <a href="link2">This link is closed.</a>
  </div>
</div></div>
<div>This document contains a <!DOCTYPE surprise>surprise doctype</div>
<div><a><B><Cd><EFG>Mixed case tags are folded to lowercase</efg></CD></b></A></div>
<div><our☃>Tag name contains Unicode characters</our☃></div>
<div><a ☃="snowman">Attribute name contains Unicode characters</a></div>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
c                   V    e Zd Zed             Zd Zd ZddZeZd Z	ddZ
d	 Zd
 ZdS )SoupTestc                     t           S N)default_builderselfs    Q/var/www/surfInsights/venv3-11/lib/python3.11/site-packages/bs4/tests/__init__.pyr   zSoupTest.default_builder^   s        c                 V    |                     d| j                  }t          |fd|i|S )z*Build a Beautiful Soup object from markup.builder)popr   r   )r   markupkwargsr   s       r   soupzSoupTest.soupb   s3    **Y(<==V??W????r   c                 B     | j         di |                    |          S )z[Turn an HTML fragment into a document.

        The details depend on the builder.
         )r   test_fragment_to_document)r   r   r   s      r   document_forzSoupTest.document_forg   s+    
 $t#--f--GGOOOr   Nc                 P   | j         }t          ||          }||}|                                |                     |          k    sJ t	          d t          |j                                                  D                       sJ |j        gd |j	        D             k    sJ dS )ziParse some markup using Beautiful Soup and verify that
        the output markup is as expected.
        )r   Nc              3   "   K   | ]
}|d k    V  dS )r   Nr"   ).0vs     r   	<genexpr>z'SoupTest.assert_soup.<locals>.<genexpr>   s&      EEA1a4EEEEEEr   c                     g | ]	}|j         
S r"   )name)r'   xs     r   
<listcomp>z(SoupTest.assert_soup.<locals>.<listcomp>   s    &D&D&D!qv&D&D&Dr   )
r   r   decoder$   alllistopen_tag_countervaluesROOT_TAG_NAMEtagStack)r   to_parsecompare_parsed_tor   objs        r   assert_soupzSoupTest.assert_soupn   s     &Hg666$ ( zz||t001BCCCCCC EEc&:&A&A&C&C!D!DEEEEEEEE !"&D&Ds|&D&D&DDDDDDDr   c                 Z    d}|j         D ] }|r||j        k    sJ ||j        k    sJ |}!dS )zyEnsure that next_element and previous_element are properly
        set for all descendants of the given element.
        N)descendantsnext_elementprevious_element)r   elementearlieres       r   assertConnectednesszSoupTest.assertConnectedness   s\     $ 	 	A 5G00000!"44444GG		 	r   Fc           	         d}|j         |j        $J d                    ||j        d                      |j        $J d                    ||j        d                      |j        $J d                    ||j        d                      d}d}d}t          |j                  dz
  }|j        D ]:}d}|dk    r|j         |j        |u s$J d                    ||j        |                      |j        |u s$J d                    ||j        |                      |j        $J d                    ||j        d                      n|j        |j        |dz
           u s2J d	                    ||j        |j        |dz
                                 |j        |dz
           j        |u s@J d
                    |j        |dz
           |j        |dz
           j        |                      |e|j        |u s/J d                    ||j        ||j         j                              |j        |u s$J d                    ||j        |                      t          |t                    rH|j        rA| 
                    |d          }|j        $J d
                    ||j        d                      ||}n|}||k    r+|j        $J d
                    ||j        d                      |dz  }<||n|}||}|s||z|}	 |,|j        $J d                    ||j        d                      nG|j        8|j        |j        u s)J d                    ||j        |j                              n|j         }udS |S )z.Ensure proper linkage throughout the document.Nz3Bad previous_element
NODE: {}
PREV: {}
EXPECTED: {}z3Bad previous_sibling
NODE: {}
PREV: {}
EXPECTED: {}z/Bad next_sibling
NODE: {}
NEXT: {}
EXPECTED: {}r      z/Bad next_element
NODE: {}
NEXT: {}
EXPECTED: {}z2Bad previous_sibling
NODE: {}
PREV {}
EXPECTED: {}z1Bad previous_sibling
NODE: {}
PREV {}
EXPECTED {}z-Bad next_sibling
NODE: {}
NEXT {}
EXPECTED {}z=Bad previous_element
NODE: {}
PREV {}
EXPECTED {}
CONTENTS {}z-Bad next_element
NODE: {}
NEXT {}
EXPECTED {}T)parentr<   formatprevious_siblingnext_siblinglencontentsr;   
isinstancer   linkage_validator)	r   el_recursive_call
descendantidxchild
last_childlast_idxtargets	            r   rJ   zSoupTest.linkage_validator   su   
 9&..HOO+T  /.. &..HOO+T  /.. ?**DKK  +**
 
r{##a'[ >	 >	EJ axx9(?e333KRR  433 !1R777OVV!5#92  877 !199NUU!5#94  :99 -S1W1EEEEJQQu5r{377K  FEE {37+8EAAAFMMC!G,bk#'.B.OQV  BAA
 ) 1Z???[bb!5#9:u|G\  @?? &2e;;;JQQ&
(?  <;;
 %%% %. !33E4@@
!.66FMM"J$;T  766 %'

"
 h)11FMMu14  211 1HCC(4

%=E 	5#4F'> -55JQQ!5#5t  655 (4 -1DDDDJQQ!5#5v7J  EDD '  4 Lr   c                 *    d |D             |k    sJ dS )zMake sure that the given tags have the correct text.

        This is used in tests that define a bunch of tags, each
        containing a single string, and then select certain strings by
        some mechanism.
        c                     g | ]	}|j         
S r"   stringr'   tags     r   r-   z+SoupTest.assert_selects.<locals>.<listcomp>  s    +++s
+++r   Nr"   r   tagsshould_matchs      r   assert_selectszSoupTest.assert_selects  s)     ,+d+++|;;;;;;r   c                 *    d |D             |k    sJ dS )zMake sure that the given tags have the correct IDs.

        This is used in tests that define a bunch of tags, each
        containing a single string, and then select certain strings by
        some mechanism.
        c                     g | ]
}|d          S idr"   rW   s     r   r-   z/SoupTest.assert_selects_ids.<locals>.<listcomp>  s    ***cD	***r   Nr"   rY   s      r   assert_selects_idszSoupTest.assert_selects_ids  s)     +*T***l::::::r   r   )F)__name__
__module____qualname__propertyr   r    r$   r8   assertSoupEqualsr@   rJ   r\   ra   r"   r   r   r   r   \   s          X@ @ @
P P PE E E E. #	 	 	p p p pd< < <; ; ; ; ;r   r   c                       e Zd Zej                            ddi  edg          ddgig          d             Zej                            d edg          ddgig          d	             Zd
 Z	dS )TreeBuilderSmokeTestmulti_valued_attributesNclass)b*notclassc                 ^    d}|                      ||          }|j        d         dk    sJ d S )NzC<html xmlns="http://www.w3.org/1999/xhtml"><a class="a b c"></html>ri   rj   za b cr    ar   ri   r   r    s       r   test_attribute_not_multi_valuedz4TreeBuilderSmokeTest.test_attribute_not_multi_valued  s=    
 Wyy9PyQQvg'))))))r   )rq   c                 b    d}|                      ||          }|j        d         g dk    sJ d S )Nz<a class="a b c">ro   rj   )rq   rk   crp   rr   s       r   test_attribute_multi_valuedz0TreeBuilderSmokeTest.test_attribute_multi_valued#  sJ     %yy,C  
 
 vg///111111r   c                 8    d}d}|                      |          }d S )Nz<![if word]>content<![endif]>z<!DOCTYPE html]ff>)r    r   r   r    s      r   test_invalid_doctypez)TreeBuilderSmokeTest.test_invalid_doctype-  s!    0%yy  r   )
rb   rc   rd   pytestmarkparametrizedictrs   rv   ry   r"   r   r   rh   rh     s         [!	r447)$$$sZL&9: * *	 *
 [!DDG9$5$5$5gY7G#H 2 2 2! ! ! ! !r   rh   c                      e Zd ZdZd Zd Zd Zd ZdDdZd Z	d	 Z
d
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d  Z!d! Z"d" Z#d# Z$d$ Z%d% Z&d& Z'd' Z(d( Z)d) Z*d* Zd+ Z+d, Z,d- Z-d. Z.d/ Z/d0 Z0d1 Z1d2 Z2d3 Z3d4 Z4d5 Z5d6 Z6d7 Z7d8 Z8d9 Z9d: Z:d; Z;d< Z<d= Z=d> Z>d? Z?d@ Z@dA ZAdB ZBdCS )EHTMLTreeBuilderSmokeTestaC  A basic test of a treebuilder's competence.

    Any HTML treebuilder, present or future, should be able to pass
    these tests. With invalid markup, there's room for interpretation,
    and different parsers can handle it differently. But with the
    markup in these tests, there's not much room for interpretation.
    c                 ~    dD ]9}|                      d          }|                    |          }|j        dk    sJ :dS )zmVerify that all HTML4 and HTML5 empty element (aka void element) tags
        are handled correctly.
        )areabasebrcolembedhrimginputkeygenlinkmenuitemmetaparamsourcetrackwbrspacerframe TN)r    new_tagis_empty_element)r   r+   r    r   s       r   test_empty_element_tagsz0HTMLTreeBuilderSmokeTest.test_empty_element_tags<  sV    
 	4 	4D 99R==Dll4((G+t33333	4 	4r   c                    |                      d          }t          |j        j        t                    sJ t          |j        j        t                    sJ |                      d          }t          |j        j        t                    sJ |j        j        dk    sJ t          |j        j        t                    sJ d S )Nz7<style>Some CSS</style><script>Some Javascript</script>z<style><!--Some CSS--></style>z<!--Some CSS-->)r    rI   stylerV   r   scriptr   r   r    s     r   test_special_string_containersz7HTMLTreeBuilderSmokeTest.test_special_string_containersH  s    yyE
 
 $*+Z88888$+,f55555yy,
 
 $*+Z88888 z $55555$*+Z8888888r   c                     |                      d          }t          j        |d          }t          j        |          }|j        t
          k    sJ |                                |                                k    sJ d S Nz<a><b>foo</a>   r    pickledumpsloads	__class__r   r.   r   treedumpedloadeds       r   !test_pickle_and_unpickle_identityz:HTMLTreeBuilderSmokeTest.test_pickle_and_unpickle_identityX  p     yy))dA&&f%%=0000}}$++--//////r   c                    |                      |          \  }}|j        d         }|j        t          k    sJ ||k    sJ |                    d          dt          |                   |k    sJ |j        j        d         dk    sJ dS )z8Assert that a given doctype string is handled correctly.r   utf8Nfoo_document_with_doctyperH   r   r   encoderG   pr   doctype_fragmentdoctype_strr    doctypes        r   assertDoctypeHandledz-HTMLTreeBuilderSmokeTest.assertDoctypeHandleda  s     778HIIT -" G++++*****{{6""#4C$4$4#45DDDD vq!U******r   DOCTYPEc                 v    d|d|d}|dz   }|                      |          }|                    d          |fS )z5Generate and parse a document with the given doctype.z<! >z
<p>foo</p>r   r    r   )r   r   doctype_stringr   r   r    s         r   r   z/HTMLTreeBuilderSmokeTest._document_with_doctypeo  sK      .0@0@0@A>)yy  ~~f%%t++r   c                 Z    |                      d           |                      d           dS )z?Make sure normal, everyday HTML doctypes are handled correctly.htmlz4html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"Nr   r   s    r   test_normal_doctypesz-HTMLTreeBuilderSmokeTest.test_normal_doctypesv  s@    !!&)))!!B	D 	D 	D 	D 	Dr   c                 ~    |                      d          }|j        d         }d|                                k    sJ d S )Nz
<!DOCTYPE>r   r   )r    rH   strip)r   r    r   s      r   test_empty_doctypez+HTMLTreeBuilderSmokeTest.test_empty_doctype|  s>    yy&&-"W]]__$$$$$$r   c                     dD ]}|                      d|          \  }}|j        d         }|j        t          k    sJ |dk    sJ |                    d          d t          |                   dk    sJ |j        j        d         dk    sJ d S )N)r   DocTyper   r   r   s   <!DOCTYPE html>r   r   r   s        r   test_mixed_case_doctypez0HTMLTreeBuilderSmokeTest.test_mixed_case_doctype  s     6 	/ 	/ $ ; ;(! !K mA&G$////f$$$$;;v&&'8K(8(8'89=OOOOO 6?1%.....	/ 	/r   c                 4    d}|                      |           d S )Nznhtml PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"r   )r   r   s     r   test_public_doctype_with_urlz5HTMLTreeBuilderSmokeTest.test_public_doctype_with_url  s$     C!!'*****r   c                 0    |                      d           d S )Nz$foo SYSTEM "http://www.example.com/"r   r   s    r   test_system_doctypez,HTMLTreeBuilderSmokeTest.test_system_doctype  s    !!"HIIIIIr   c                 0    |                      d           d S )Nz#xsl:stylesheet SYSTEM "htmlent.dtd"r   r   s    r   test_namespaced_system_doctypez7HTMLTreeBuilderSmokeTest.test_namespaced_system_doctype      !!"GHHHHHr   c                 0    |                      d           d S )Nz#xsl:stylesheet PUBLIC "htmlent.dtd"r   r   s    r   test_namespaced_public_doctypez7HTMLTreeBuilderSmokeTest.test_namespaced_public_doctype  r   r   c                 $   d}t          j        d          5 }|                     |          }ddd           n# 1 swxY w Y   |                    d                              dd          |                    dd          k    sJ |g k    sJ dS )zJA real XHTML document should come out more or less the same as it went in.   <?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Hello.</title></head>
<body>Goodbye.</body>
</html>TrecordNutf-8   
r   )warningscatch_warningsr    r   replacer   r   wr    s       r   test_real_xhtml_documentz1HTMLTreeBuilderSmokeTest.test_real_xhtml_document  s     $D111 	%Q99V$$D	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	%{{7##++E3776>>%QT;U;UUUUU Bwwwwww   :>>c                     d}t          j        d          5 }|                     |          }d d d            n# 1 swxY w Y   dt          |                    d                    k    sJ g |k    sJ d S )Ns.   <ns1:foo>content</ns1:foo><ns1:foo/><ns2:foo/>Tr   r   zns1:foo)r   r   r    rG   find_allr   s       r   test_namespaced_htmlz-HTMLTreeBuilderSmokeTest.test_namespaced_html  s     G$D111 	%Q99V$$D	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% Ci00111111 Qwwwwwwr   c                 <   d}t          j        d          5 }|                     |          }|j        j        dk    sJ 	 d d d            n# 1 swxY w Y   |\  }t          |j        t                    sJ t          |j                  t          j	        k    sJ d S )Ns7   <?xml version="1.0" encoding="utf-8"?><tag>string</tag>Tr   rV   )
r   r   r    rX   rV   rI   messager   strMESSAGE)r   r   r   r    warnings        r   test_detect_xml_parsed_as_htmlz7HTMLTreeBuilderSmokeTest.test_detect_xml_parsed_as_html  s     P$D111 	/Q99V$$D8?h.....	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	'/+ABBBBB7?##'='EEEEEEEs   (AAAc                     d}|                      |          }||                                k    sJ d}|                      |          }||                    d          k    sJ d S )Nz<?PITarget PIContent?>s   <?PITarget PIContent?>r   )r    r.   r   rx   s      r   test_processing_instructionz4HTMLTreeBuilderSmokeTest.test_processing_instruction  sl    
 .yy  &&&&.yy  V,,,,,,,,r   c                 8    t          j        | j                   dS )zMake sure you can copy the tree builder.

        This is important because the builder is part of a
        BeautifulSoup object, and we want to be able to copy that.
        N)copydeepcopyr   r   s    r   test_deepcopyz&HTMLTreeBuilderSmokeTest.test_deepcopy  s     	d*+++++r   c                     |                      d          }|j        j        rJ t          |j                  dk    sJ dS )zA <p> tag is never designated as an empty-element tag.

        Even if the markup shows it as an empty-element tag, it
        shouldn't be presented that way.
        <p/><p></p>N)r    r   r   r   r   s     r   !test_p_tag_is_never_empty_elementz:HTMLTreeBuilderSmokeTest.test_p_tag_is_never_empty_element  sE     yy  6****46{{i''''''r   c                     |                      dd           |                      dd           |                      dd           dS )zA tag that's not closed by the end of the document should be closed.

        This applies to all tags except empty-element tags.
        <p>r   z<b>z<b></b>z<br><br/>Nr8   r   s    r   test_unclosed_tags_get_closedz6HTMLTreeBuilderSmokeTest.test_unclosed_tags_get_closed  sL    
 		***	***)))))r   c                     |                      d          }|j        j        sJ t          |j                  dk    sJ dS )zA <br> tag is designated as an empty-element tag.

        Some parsers treat <br></br> as one <br/> tag, some parsers as
        two tags, but it should always be an empty-element tag.
        z	<br></br>r   N)r    r   r   r   r   s     r   #test_br_is_always_empty_element_tagz<HTMLTreeBuilderSmokeTest.test_br_is_always_empty_element_tag  sE     yy%%w''''47||w&&&&&&r   c                 0    |                      d           d S )Nz<em><em></em></em>r   r   s    r   test_nested_formatting_elementsz8HTMLTreeBuilderSmokeTest.test_nested_formatting_elements  s    -.....r   c                 v    d}|                      |          }d|                    d          d         k    sJ d S )Nz<!DOCTYPE html>
<html>
<head>
<title>Ordinary HEAD element test</title>
</head>
<script type="text/javascript">
alert("Help!");
</script>
<body>
Hello, world!
</body>
</html>
ztext/javascriptr   type)r    find)r   r   r    s      r   test_double_headz)HTMLTreeBuilderSmokeTest.test_double_head  sB     yy DIIh$7$7$???????r   c                 :   d}|                      |           |                     |          }|                    d          }|j        t          k    sJ |                    d          }||j        k    sJ |                    d          }||j        k    sJ d S )Nz<p>foo<!--foobar-->baz</p>foobarrU   r   baz)r8   r    r   r   r   r;   r<   )r   r   r    commentr   r   s         r   test_commentz%HTMLTreeBuilderSmokeTest.test_comment  s    -   yy  ))8),, G++++ iiui%%#*****iiui%%#.......r   c                    d}d}|                      |           |                      |           |                     |          }|j                                        |k    sJ |                     |          }|j                                        |k    sJ |                     d          }|j                                        dk    sJ dS )zWhitespace must be preserved in <pre> and <textarea> tags,
        even if that would mean not prettifying the markup.
        z<pre>a   z</pre>
z <textarea> woo
woo  </textarea>
z<textarea></textarea>z<textarea></textarea>
N)r8   r    preprettifytextarea)r   
pre_markuptextarea_markupr    s       r   -test_preserved_whitespace_in_pre_and_textareazFHTMLTreeBuilderSmokeTest.test_preserved_whitespace_in_pre_and_textarea&  s     *
>$$$)))yy$$x  ""j0000yy))}%%''?::::yy011}%%''+DDDDDDDr   c                     d}|                      |           d}|                      |           d}|                      |           dS )z+Inline elements can be nested indefinitely.z<b>Inside a B tag</b>z!<p>A <i>nested <b>tag</b></i></p>z/<p>A <a>doubly <i>nested <b>tag</b></i></a></p>Nr   )r   b_tagnested_b_tagdouble_nested_b_tags       r   test_nested_inline_elementsz4HTMLTreeBuilderSmokeTest.test_nested_inline_elements8  sT    ':&&&O&&&&&r   c                     |                      d          }|j        }|j        j        j        dk    sJ |j        j        dk    sJ dS )zBlock elements can be nested.z*<blockquote><p><b>Foo</b></p></blockquote>FooN)r    
blockquoter   rk   rV   )r   r    r  s      r    test_nested_block_level_elementsz9HTMLTreeBuilderSmokeTest.test_nested_block_level_elementsC  sO    yyEFF_
|~$----|"e++++++r   c                 `    d}|                      |d           |                      d           dS )z$One table can go inside another one.z[<table id="1"><tr><td>Here's another table:<table id="2"><tr><td>foo</td></tr></table></td>zh<table id="1"><tr><td>Here's another table:<table id="2"><tr><td>foo</td></tr></table></td></tr></table>z{<table><thead><tr><td>Foo</td></tr></thead><tbody><tr><td>Bar</td></tr></tbody><tfoot><tr><td>Baz</td></tr></tfoot></table>Nr   )r   r   s     r   test_correctly_nested_tablesz5HTMLTreeBuilderSmokeTest.test_correctly_nested_tablesJ  sT    " 	!	" 	" 	" 	;	< 	< 	< 	< 	<r   c                     d}|                      |          }ddg|j        d         k    sJ |j        |                    dd          k    sJ d S )Nz<div class=" foo bar	 "></a>r   barrj   divzfoo bar)class_)r    r  r   rx   s      r   *test_multivalued_attribute_with_whitespacezCHTMLTreeBuilderSmokeTest.test_multivalued_attribute_with_whitespace^  sa     0yy  u~'!22222 x499U99========r   c                 f    d}|                      |          }dg|j        j        d         k    sJ d S )Nz1<table><div><div class="css"></div></div></table>cssrj   )r    r  rx   s      r   (test_deeply_nested_multivalued_attributezAHTMLTreeBuilderSmokeTest.test_deeply_nested_multivalued_attributej  s=     Eyy  w$(,w///////r   c                 ^    d}|                      |          }ddg|j        d         k    sJ d S )Nz<html class="a b"></html>rq   rk   rj   )r    r   rx   s      r   "test_multivalued_attribute_on_htmlz;HTMLTreeBuilderSmokeTest.test_multivalued_attribute_on_htmlr  s<     -yy  SzTYw///////r   c                 2    |                      dd           d S )Nz<a b="<a>"></a>z<a b="&lt;a&gt;"></a>r   r   s    r   3test_angle_brackets_in_attribute_values_are_escapedzLHTMLTreeBuilderSmokeTest.test_angle_brackets_in_attribute_values_are_escapedz  s     *,CDDDDDr   c                 2    |                      dd           d S )Nz$<p>&bull; AT&T is in the s&p 500</p>u)   <p>• AT&amp;T is in the s&amp;p 500</p>r   r   s    r   3test_strings_resembling_character_entity_referenceszLHTMLTreeBuilderSmokeTest.test_strings_resembling_character_entity_references}  s,     	2:	
 	
 	
 	
 	
r   c                 2    |                      dd           d S )Nz<p>Bob&apos;s Bar</p>z<p>Bob's Bar</p>r   r   s    r   test_apos_entityz)HTMLTreeBuilderSmokeTest.test_apos_entity  s*    #	
 	
 	
 	
 	
r   c                 X    d}|                      |          }d|j        j        k    sJ d S )Nz%<p>&#147;Hello&#148; &#45;&#9731;</p>u   “Hello” -☃r    r   rV   rx   s      r   *test_entities_in_foreign_document_encodingzCHTMLTreeBuilderSmokeTest.test_entities_in_foreign_document_encoding  s5     9yy  !TV]222222r   c                     d}|                      d|           |                      d|           |                      d|           |                      d|           d S )Nu   <p id="piñata"></p>z<p id="pi&#241;ata"></p>z<p id="pi&#xf1;ata"></p>z<p id="pi&#Xf1;ata"></p>z<p id="pi&ntilde;ata"></p>r   r   expects     r   0test_entities_in_attributes_converted_to_unicodezIHTMLTreeBuilderSmokeTest.test_entities_in_attributes_converted_to_unicode  si    H3V<<<3V<<<3V<<<5v>>>>>r   c                     d}|                      d|           |                      d|           |                      d|           |                      d|           d S )Nu   <p>piñata</p>z<p>pi&#241;ata</p>z<p>pi&#xf1;ata</p>z<p>pi&#Xf1;ata</p>z<p>pi&ntilde;ata</p>r   r'  s     r   *test_entities_in_text_converted_to_unicodezCHTMLTreeBuilderSmokeTest.test_entities_in_text_converted_to_unicode  si    B-v666-v666-v666/88888r   c                 2    |                      dd           d S )Nz#<p>I said &quot;good day!&quot;</p>z<p>I said "good day!"</p>r   r   s    r   ,test_quot_entity_converted_to_quotation_markzEHTMLTreeBuilderSmokeTest.test_quot_entity_converted_to_quotation_mark  s*    >9	; 	; 	; 	; 	;r   c                     d}|                      d|           |                      d|           |                      d|           d S )Nu   �z&#10000000000000;z&#x10000000000000;z&#1000000000;r   r'  s     r   test_out_of_range_entityz1HTMLTreeBuilderSmokeTest.test_out_of_range_entity  sQ    ,,f555-v666&11111r   c                     |                      d          }d|j        j        j        j        k    sJ d|j        j        k    sJ |                     |           dS )zDMostly to prevent a recurrence of a bug in the html5lib treebuilder.z!<html><h2>
foo</h2><p></p></html>r   N)r    h2rV   r;   r+   r   r@   r   s     r   test_multipart_stringsz/HTMLTreeBuilderSmokeTest.test_multipart_strings  s^    yy=>>dgn166666dfk!!!!  &&&&&r   c                 ^    |                      dd           |                      dd           dS )zqVerify consistent handling of empty-element tags,
        no matter how they come in through the markup.
        z<br/><br/><br/>z<br /><br /><br />Nr   r   s    r   r   z0HTMLTreeBuilderSmokeTest.test_empty_element_tags  s:     	*,=>>>-/@AAAAAr   c                 z    d}|                      |          }|j        j        J |                     |           dS )8Prevent recurrence of a bug in the html5lib treebuilder.z?<html><head></head>
  <link></link>
  <body>foo</body>
</html>
N)r    r   bodyr@   r   contentr    s      r   #test_head_tag_between_head_and_bodyz<HTMLTreeBuilderSmokeTest.test_head_tag_between_head_and_body  sF    
 yy!!y~)))  &&&&&r   c                 h    d}|                      |          }|                     |j                   dS )r5  z<!DOCTYPE html>
<html>
 <body>
   <article id="a" >
   <div><a href="1"></div>
   <footer>
     <a href="2"></a>
   </footer>
  </article>
  </body>
</html>
N)r    r@   articler7  s      r   test_multiple_copies_of_a_tagz6HTMLTreeBuilderSmokeTest.test_multiple_copies_of_a_tag  s8     yy!!  .....r   c                     d}|                      |          }||                                k    sJ |j        }d|j        d         k    sJ d|j        d         k    sJ d|j        d         k    sJ dS )	zParsers don't need to *understand* namespaces, but at the
        very least they should not choke on namespaces or lose
        data.s   <html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>http://www.w3.org/1999/xhtmlxmlns"http://www.w3.org/1998/Math/MathMLzxmlns:mathmlhttp://www.w3.org/2000/svgz	xmlns:svgN)r    r   r   )r   r   r    r   s       r   test_basic_namespacesz.HTMLTreeBuilderSmokeTest.test_basic_namespaces  s    
 nyy  &&&&y-71CCCCC3ty7PPPPP+ty/EEEEEEEr   c                 ^    d}|                      |          }ddg|j        d         k    sJ d S )Ns   <a class="foo bar">r   r  rj   rp   rx   s      r   -test_multivalued_attribute_value_becomes_listzFHTMLTreeBuilderSmokeTest.test_multivalued_attribute_value_becomes_list  s9    'yy  u~000000r   c                 X    d}|                      |          }d|j        j        k    sJ d S )NuD   <html><head><meta encoding="euc-jp"></head><body>Sacré bleu!</body>   Sacré bleu!)r    r6  rV   rx   s      r   test_can_parse_unicode_documentz8HTMLTreeBuilderSmokeTest.test_can_parse_unicode_document  s7     yyy  49#3333333r   c                     t          d          }|                     d|          }|                                dk    sJ dS )z2Parsers should be able to work with SoupStrainers.rk   z&A <b>bold</b> <meta/> <i>statement</i>)
parse_onlyz<b>bold</b>N)r
   r    r.   )r   strainerr    s      r   test_soupstrainerz*HTMLTreeBuilderSmokeTest.test_soupstrainer  sL    $$yyA$,  . .{{}}------r   c                 2    |                      dd           d S )Nz<foo attr='bar'></foo>z<foo attr="bar"></foo>r   r   s    r   7test_single_quote_attribute_values_become_double_quoteszPHTMLTreeBuilderSmokeTest.test_single_quote_attribute_values_become_double_quotes  s*    16	8 	8 	8 	8 	8r   c                 4    d}|                      |           d S )N'<foo attr='bar "brawls" happen'>a</foo>r   )r   texts     r   7test_attribute_values_with_nested_quotes_are_left_alonezPHTMLTreeBuilderSmokeTest.test_attribute_values_with_nested_quotes_are_left_alone  s!    <r   c                     d}|                      |          }d|j        d<   |                     |j                                        d           d S )NrO  zBrawls happen at "Bob's Bar"attrz:<foo attr="Brawls happen at &quot;Bob's Bar&quot;">a</foo>)r    r   r8   r.   )r   rP  r    s      r   :test_attribute_values_with_double_nested_quotes_get_quotedzSHTMLTreeBuilderSmokeTest.test_attribute_values_with_double_nested_quotes_get_quoted	  s[    <yy:HOOM	O 	O 	O 	O 	Or   c                 ^    |                      dd           |                      dd           d S )Nz+<this is="really messed up & stuff"></this>z/<this is="really messed up &amp; stuff"></this>z.<a href="http://example.org?a=1&b=2;3">foo</a>z2<a href="http://example.org?a=1&amp;b=2;3">foo</a>r   r   s    r   .test_ampersand_in_attribute_value_gets_escapedzGHTMLTreeBuilderSmokeTest.test_ampersand_in_attribute_value_gets_escaped  sR    FO	Q 	Q 	Q 	<@	B 	B 	B 	B 	Br   c                 0    |                      d           d S )Nz/<a href="http://example.org?a=1&amp;b=2;3"></a>r   r   s    r   7test_escaped_ampersand_in_attribute_value_is_left_alonezPHTMLTreeBuilderSmokeTest.test_escaped_ampersand_in_attribute_value_is_left_alone  s    JKKKKKr   c                 :    d}d}|                      ||           d S )N-<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>#   <p>&lt;&lt;sacré bleu!&gt;&gt;</p>r   )r   rP  expecteds      r   1test_entities_in_strings_converted_during_parsingzJHTMLTreeBuilderSmokeTest.test_entities_in_strings_converted_during_parsing  s*     ?Yx(((((r   c                 X    d}|                      |          }|j        j        dk    sJ d S )Ns   <p>Foo</p>u	   ‘Foo’r$  )r   quoter    s      r   )test_smart_quotes_converted_on_the_way_inzBHTMLTreeBuilderSmokeTest.test_smart_quotes_converted_on_the_way_in#  s6     &yyv} bbbbbbbr   c                 T    |                      d          }|j        j        dk    sJ d S )Nz<a>&nbsp;&nbsp;</a>u     )r    rq   rV   r   s     r   0test_non_breaking_spaces_converted_on_the_way_inzIHTMLTreeBuilderSmokeTest.test_non_breaking_spaces_converted_on_the_way_in*  s0    yy.//v} 8888888r   c                     d}d                     d          }|                     |          }|j                             d          |k    sJ d S )NrZ  r[  r   )r   r    r   )r   rP  r\  r    s       r   &test_entities_converted_on_the_way_outz?HTMLTreeBuilderSmokeTest.test_entities_converted_on_the_way_out.  sO    >Y``ahiiyyv}}W%%111111r   c                     d}|                     d          }|                     |          }|                     d          }|                    dd          }|                     d          }||k    sJ d S )Nu   <html><head><meta content="text/html; charset=ISO-8859-1" http-equiv="Content-type"/></head><body><p>Sacré bleu!</p></body></html>z
iso-8859-1r   z
ISO-8859-1)r   r    r   )r   unicode_htmliso_latin_htmlr    resultr\  s         r   test_real_iso_8859_documentz4HTMLTreeBuilderSmokeTest.test_real_iso_8859_document4  s    
 ~ &,,\:: yy(( W%%
  ''g>> ??7++ !!!!!!r   c                    d}|                     d          }|                     |          }|                    d          |                    d          k    sJ |                    d          |                    d          k    sJ d S )Nsk   <html><head></head><body><pre>Shift-JISŃR[fBOꂽ{̃t@CłB</pre></body></html>z	shift-jisr   euc_jp)r.   r    r   )r   shift_jis_htmlrf  r    s       r   test_real_shift_jis_documentz5HTMLTreeBuilderSmokeTest.test_real_shift_jis_documentP  s    $ 	 &,,[99yy&& {{7##|':':7'C'CCCCC{{8$$(;(;H(E(EEEEEEEr   c                     d}|                      |d          }|j        dv sJ |                    d          |                    d                              d          k    sJ d S )Ns   <html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1></body></html>	iso8859-8)from_encoding)ro  z
iso-8859-8r   )r    original_encodingr   r.   )r   hebrew_documentr    s      r   test_real_hebrew_documentz2HTMLTreeBuilderSmokeTest.test_real_hebrew_documenta  s     Eyy;  8 8 %)DDDDD{{7##"";//66w??
 
 
 
 
 
r   c                     d}d|z  }|                      |          }|                    dddi          }|d         }d|k    sJ t          |t                    sJ d|                    d	          k    sJ d S )
NzE<meta content="text/html; charset=x-sjis" http-equiv="Content-type"/>j<html><head>
%s
<meta http-equiv="Content-language" content="ja"/></head><body>Shift-JIS markup goes here.r   z
http-equivzContent-typer8  ztext/html; charset=x-sjisztext/html; charset=utf8r   )r    r   rI   r   r   )r   meta_tagrl  r    parsed_metar8  s         r   'test_meta_tag_reflects_current_encodingz@HTMLTreeBuilderSmokeTest.test_meta_tag_reflects_current_encodingn  s    2
7:BC yy(( ii~(FGGi(*g5555 '#<===== )GNN6,B,BBBBBBBr   c                     d}d|z  }|                      |          }|                    dd          }|d         }d|k    sJ t          |t                    sJ d|                    d          k    sJ d S )	Nz'<meta id="encoding" charset="x-sjis" />ru  r   encodingr_   charsetzx-sjisr   )r    r   rI   r   r   )r   rv  rl  r    rw  r{  s         r   3test_html5_style_meta_tag_reflects_current_encodingzLHTMLTreeBuilderSmokeTest.test_html5_style_meta_tag_reflects_current_encoding  s     >7:BC yy(( ii:i66i(7"""" '#<===== ////////r   c                     dD ]Z}|                      |          }t          D ];}|dv r|                    |          }d|v sJ |                    d          |vsJ <[d S )N)sB   <meta charset="utf8"></head><meta id="encoding" charset="utf-8" />idnambcsoem	undefinedstring_escapezstring-escapes   meta charset=""asciir    r	   r   r   r   r    rz  encodeds        r   2test_python_specific_encodings_not_used_in_charsetzKHTMLTreeBuilderSmokeTest.test_python_specific_encodings_not_used_in_charset  s    

 	? 	?F 99V$$D5 ? ?     ++h//)W4444w//w>>>>>?	? 	?r   c                     |                      d          }d|j        d<   d|j                                        k    sJ d S )Nz<a>text</a>r  r   z<a foo="bar">text</a>)r    rq   r.   )r   datas     r   5test_tag_with_no_attributes_can_have_attributes_addedzNHTMLTreeBuilderSmokeTest.test_tag_with_no_attributes_can_have_attributes_added  s?    yy''u&$&--//999999r   c                 n    |                      d          }d|j                                        k    sJ d S )Nz0<body><div><p>text1</p></span>text2</div></body>z)<body><div><p>text1</p>text2</div></body>)r    r6  r.   r   s     r   $test_closing_tag_with_no_opening_tagz=HTMLTreeBuilderSmokeTest.test_closing_tag_with_no_opening_tag  s;    
 yyKLL:di>N>N>P>PPPPPPPr   c                 d    |                      t                    }|                     |           dS z3Test the worst case (currently) for linking issues.Nr    BAD_DOCUMENTrJ   r   s     r   test_worst_casez(HTMLTreeBuilderSmokeTest.test_worst_case  /     yy&&t$$$$$r   N)r   )Crb   rc   rd   __doc__r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r   r"  r%  r)  r+  r-  r/  r2  r9  r<  rB  rD  rG  rK  rM  rQ  rT  rV  rX  r]  r`  rb  rd  ri  rm  rs  rx  r|  r  r  r  r  r"   r   r   r   r   2  s        
4 
4 
49 9 9 0 0 0+ + +, , , ,D D D% % %
/ / /$+ + +J J JI I II I I  "  	F 	F 	F - - -, , ,( ( (* * *' ' '/ / /@ @ @"/ / /E E E$	' 	' 	', , ,< < <(
> 
> 
>0 0 00 0 0E E E
 
 

 
 

3 
3 
3? ? ?9 9 9; ; ;2 2 2' ' 'B B B	' 	' 	'/ / /"F F F1 1 14 4 4. . .8 8 8  O O OB B BL L L) ) )c c c9 9 92 2 2" " "8F F F"
 
 
C C C80 0 00? ? ?.: : :
Q Q Q% % % % %r   r   c                       e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd ZdS )XMLTreeBuilderSmokeTestc                     |                      d          }t          j        |d          }t          j        |          }|j        t
          k    sJ |                                |                                k    sJ d S r   r   r   s       r   r   z9XMLTreeBuilderSmokeTest.test_pickle_and_unpickle_identity  r   r   c                 d    |                      d          }|                                dk    sJ d S )N<root/>s.   <?xml version="1.0" encoding="utf-8"?>
<root/>r   r   s     r   test_docstring_generatedz0XMLTreeBuilderSmokeTest.test_docstring_generated  s3    yy##{{}} RRRRRRRr   c                 j    d}|                      |          }||                    d          k    sJ d S )Ns,   <?xml version="1.0" encoding="utf8"?>
<foo/>r   r   rx   s      r   test_xml_declarationz,XMLTreeBuilderSmokeTest.test_xml_declaration  s;    Eyy  V,,,,,,,,r   c                     d}|                      |          }t          D ];}|dv r|                    |          }d|v sJ |                    d          |vsJ <d S )Ns   <?xml version="1.0"?>
<foo/>r~  s   <?xml version="1.0"?>r  r  r  s        r   :test_python_specific_encodings_not_used_in_xml_declarationzRXMLTreeBuilderSmokeTest.test_python_specific_encodings_not_used_in_xml_declaration  s     6yy  1 	; 	;H    kk(++G+w6666??7++7:::::	; 	;r   c                 j    d}|                      |          }||                    d          k    sJ d S )Ns<   <?xml version="1.0" encoding="utf8"?>
<?PITarget PIContent?>r   r   rx   s      r   r   z3XMLTreeBuilderSmokeTest.test_processing_instruction  s;    Uyy  V,,,,,,,,r   c                 j    d}|                      |          }|                    d          |k    sJ dS )zGA real XHTML document should come out *exactly* the same as it went in.r   r   Nr   rx   s      r   r   z0XMLTreeBuilderSmokeTest.test_real_xhtml_document  s?     yy  {{7##v------r   c                 h    d}|                      |          }||                                k    sJ d S )Ns  <?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<parent xmlns="http://ns1/">
<child xmlns="http://ns2/" xmlns:ns3="http://ns3/">
<grandchild ns3:attr="value" xmlns="http://ns4/"/>
</child>
</parent>r   r   docr    s      r   test_nested_namespacesz.XMLTreeBuilderSmokeTest.test_nested_namespaces  s9     yy~~dkkmm######r   c                 v    d}t          |d          }d|j        _        |                                }d|v sJ d S )Nz/
  <script type="text/javascript">
  </script>
zlxml-xmlzconsole.log("< < hey > > ");s   &lt; &lt; hey &gt; &gt;)r   r   rV   r   )r   r  r    r  s       r   5test_formatter_processes_script_tag_for_xml_documentszMXMLTreeBuilderSmokeTest.test_formatter_processes_script_tag_for_xml_documents  sJ     S*-- <++--)W444444r   c                 X    d}|                      |          }d|j        j        k    sJ d S )Nu?   <?xml version="1.0" encoding="euc-jp"><root>Sacré bleu!</root>rF  r    rootrV   rx   s      r   rG  z7XMLTreeBuilderSmokeTest.test_can_parse_unicode_document  s4    syy  49#3333333r   c                 X    d}|                      |          }d|j        j        k    sJ d S )NuB   ﻿<?xml version="1.0" encoding="euc-jp"><root>Sacré bleu!</root>rF  r  rx   s      r   1test_can_parse_unicode_document_begining_with_bomzIXMLTreeBuilderSmokeTest.test_can_parse_unicode_document_begining_with_bom   s7     Gyy  49#3333333r   c                 h    d}|                      |          }t          |j                  |k    sJ d S )Nz<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>)r    r   rssrx   s      r   test_popping_namespaced_tagz3XMLTreeBuilderSmokeTest.test_popping_namespaced_tag%  s:     Wyy  48}}&&&&&&r   c                 f    |                      d          }|                    d          dk    sJ d S )Nr  latin1s/   <?xml version="1.0" encoding="latin1"?>
<root/>r   r   s     r   (test_docstring_includes_correct_encodingz@XMLTreeBuilderSmokeTest.test_docstring_includes_correct_encoding*  s7    yy##{{8$$([[[[[[[r   c                 j    d}|                      |          }|                    d          |k    sJ dS )z<A large XML document should come out the same as it went in.s4  <?xml version="1.0" encoding="utf-8"?>
<root>0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000</root>r   Nr   rx   s      r   test_large_xml_documentz/XMLTreeBuilderSmokeTest.test_large_xml_document.  s?     yy  {{7##v------r   c                 \    |                      dd           |                      d           d S )Nr   r   z
<p>foo</p>r   r   s    r   9test_tags_are_empty_element_if_and_only_if_they_are_emptyzQXMLTreeBuilderSmokeTest.test_tags_are_empty_element_if_and_only_if_they_are_empty6  s2    '''&&&&&r   c                 z    d}|                      |          }|j        }d|d         k    sJ d|d         k    sJ d S )Nz<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>zhttp://example.com/zxmlns:azhttp://example.net/zxmlns:b)r    r  )r   r   r    r  s       r   test_namespaces_are_preservedz5XMLTreeBuilderSmokeTest.test_namespaces_are_preserved:  sR     wyy  y$Y7777$Y777777r   c                 h    d}|                      |          }t          |j                  |k    sJ d S )NzN<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>)r    r   r   rx   s      r   test_closing_namespaced_tagz3XMLTreeBuilderSmokeTest.test_closing_namespaced_tagA  s7    ayy  46{{f$$$$$$r   c                 h    d}|                      |          }t          |j                  |k    sJ d S )Nzs<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>r    r   r   rx   s      r   test_namespaced_attributesz2XMLTreeBuilderSmokeTest.test_namespaced_attributesF  s:     Gyy  48}}&&&&&&r   c                 h    d}|                      |          }t          |j                  |k    sJ d S )Nz<foo xml:lang="fr">bar</foo>r  rx   s      r   (test_namespaced_attributes_xml_namespacez@XMLTreeBuilderSmokeTest.test_namespaced_attributes_xml_namespaceK  s7    /yy  48}}&&&&&&r   c                 &   d}|                      |          }dt          |                    d                    k    sJ dt          |                    d                    k    sJ dt          |                    d                    k    sJ 	 d S )Na  <?xml version="1.0" encoding="utf-8"?>
<Document xmlns="http://example.com/ns0"
    xmlns:ns1="http://example.com/ns1"
    xmlns:ns2="http://example.com/ns2">
    <ns1:tag>foo</ns1:tag>
    <ns1:tag>bar</ns1:tag>
    <ns2:tag key="value">baz</ns2:tag>
</Document>
   rX   r   zns1:tagrB   zns2:tag)r    rG   r   r  s      r   test_find_by_prefixed_namez2XMLTreeBuilderSmokeTest.test_find_by_prefixed_nameP  s     yy~~ Ce,,------ Ci00111111Ci00111111<<<r   c                     d}|                      |          }|j        }t          j        |          }|j        |j        k    sJ d S )Nzf<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:document xmlns:w="http://example.com/ns0"/>)r    documentr   prefix)r   xmlr    rX   	duplicates        r   !test_copy_tag_preserves_namespacez9XMLTreeBuilderSmokeTest.test_copy_tag_preserves_namespacef  sL    2 yy~~mIcNN	 zY-------r   c                 d    |                      t                    }|                     |           dS r  r  r   s     r   r  z'XMLTreeBuilderSmokeTest.test_worst_caseq  r  r   N)rb   rc   rd   r   r  r  r  r   r   r  r  rG  r  r  r  r  r  r  r  r  r  r  r  r  r"   r   r   r  r    sU       0 0 0S S S- - -
; ; ;&- - -
	. 	. 	.	$ 	$ 	$
5 
5 
54 4 4
4 4 4
' ' '
\ \ \. . .' ' '8 8 8% % %
' ' '
' ' '
= = =,	. 	. 	.% % % % %r   r  c                   0    e Zd ZdZd Zd Zd Zd Zd ZdS )HTML5TreeBuilderSmokeTestz2Smoke test for a tree builder that supports HTML5.c                     d S r   r"   r   s    r   r   z2HTML5TreeBuilderSmokeTest.test_real_xhtml_document{  s	     	r   c                 X    d}|                      |          }d|j        j        k    sJ d S )Nz<a>r>  )r    rq   	namespacerx   s      r   test_html_tags_have_namespacez7HTML5TreeBuilderSmokeTest.test_html_tags_have_namespace  s4    yy  -1AAAAAAAr   c                     d}|                      |          }d}||j        j        k    sJ ||j        j        k    sJ d S )Nz<svg><circle/></svg>rA  )r    svgr  circler   r   r    r  s       r   test_svg_tags_have_namespacez6HTML5TreeBuilderSmokeTest.test_svg_tags_have_namespace  sO    'yy  0	DH.....DK1111111r   c                     d}|                      |          }d}||j        j        k    sJ ||j        j        k    sJ d S )Nz<math><msqrt>5</msqrt></math>r@  )r    mathr  msqrtr  s       r   test_mathml_tags_have_namespacez9HTML5TreeBuilderSmokeTest.test_mathml_tags_have_namespace  sO    0yy  8	DI/////DJ0000000r   c                     d}|                      |          }t          |j        d         t                    sJ |j        d         dk    sJ d|j        d         j        j        k    sJ d S )Nz3<?xml version="1.0" encoding="utf-8"?><html></html>r   z$?xml version="1.0" encoding="utf-8"?r   )r    rI   rH   r   r;   r+   rx   s      r   $test_xml_declaration_becomes_commentz>HTML5TreeBuilderSmokeTest.test_xml_declaration_becomes_comment  sr    Fyy  $-*G44444}Q#IIIIIq)6;;;;;;;r   N)	rb   rc   rd   r  r   r  r  r  r  r"   r   r   r  r  x  sh        <<  
B B B
2 2 21 1 1< < < < <r   r  )*r  __license__r   r   	functoolsr   rz   bs4r   bs4.elementr   r   r   r   r	   r
   r   r   r   bs4.builderr   r   r   r   	soupsiever   SOUP_SIEVE_PRESENTImportErrorhtml5libHTML5LIB_PRESENT
lxml.etreelxmlLXML_PRESENTetreeLXML_VERSIONr  objectr   rh   r   r  r  r"   r   r   <module>r     s                   
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
         
 (
------   OOO   L:*LL   LLLL"Jx; x; x; x; x;v x; x; x;v! ! ! ! !6 ! ! !6X
% X
% X
% X
% X
%3 X
% X
% X
%vh% h% h% h% h%2 h% h% h%V!< !< !< !< !< 8 !< !< !< !< !<s6   A AAA   A*)A*.B 	BB