+
    Ĝi!                        R t ^ RIt^ RIt^ RIHt ^ RIHt ^ RIH	t	 ^ RI
HtHtHt  ]t ^ RIHt  ^ RIHt  ! R R	]4      t ^ R
IHt  ! R R]4      t]! 4       tR tRR ltRR ltRR ltRR lt RR lt!R t"]! 4       t#R#   ] d    ]]3t Lgi ; i  ] d
    ^ RIHt  Lsi ; i  ] d
    ^ RIHt  Li ; i  ] d     Lhi ; i)z?
An interface to html5lib that mimics the lxml.html interface.
N)
HTMLParser)TreeBuilder)etree)ElementXHTML_NAMESPACE_contains_block_level_tag)urlopen)urlparsec                   .   a  ] tR t^t o RtRR ltRtV tR# )r   z*An html5lib HTML parser with lxml as tree.c                D    \         P                  ! V 3R VR\        /VB  R# stricttreeN)_HTMLParser__init__r   selfr   kwargss   &&,^/Users/mibo/.openclaw/workspace/.venv-ak/lib/python3.14/site-packages/lxml/html/html5parser.pyr   HTMLParser.__init__   s    TM&M{MfM     NF__name__
__module____qualname____firstlineno____doc__r   __static_attributes____classdictcell____classdict__s   @r   r   r      s     4N Nr   r   )XHTMLParserc                   .   a  ] tR t^'t o RtRR ltRtV tR# )r#   z+An html5lib XHTML Parser with lxml as tree.c                D    \         P                  ! V 3R VR\        /VB  R# r   )_XHTMLParserr   r   r   s   &&,r   r   XHTMLParser.__init__*   s    !!$RvRKR6Rr   r   Nr   r   r!   s   @r   r#   r#   '   s     9	S 	Sr   r#   c                 j    V P                  V4      pVe   V# V P                  R\        : RV: 24      # )N{})findr   )r   tagelems   && r   	_find_tagr.   0   s.    99S>D99#677r   c                    \        V \        4      '       g   \        R4      hVf   \        p/ pVf   \        V \        4      '       d   RpVe   WR&   VP
                  ! V 3/ VB P                  4       # )z
Parse a whole document into a string.

If `guess_charset` is true, or if the input is not Unicode but a
byte string, the `chardet` library will perform charset guessing
on the string.
string requiredT
useChardet)
isinstance_strings	TypeErrorhtml_parserbytesparsegetroot)htmlguess_charsetparseroptionss   &&& r   document_fromstringr=   7   sp     dH%%)**~GD%!8!8   -<<((0022r   c                   \        V \        4      '       g   \        R4      hVf   \        p/ pVf   \        V \        4      '       d   RpVe   W$R&   VP
                  ! V R3/ VB pV'       dj   \        V^ ,          \        4      '       dM   V'       dE   V^ ,          P                  4       '       d%   \        P                  ! RV^ ,          ,          4      hV^  V# )aH  Parses several HTML elements, returning a list of elements.

The first item in the list may be a string.  If no_leading_text is true,
then it will be an error if there is leading text, and it will always be
a list of only elements.

If `guess_charset` is true, the `chardet` library will perform charset
guessing on the string.
r0   Fr1   divzThere is leading text: %r)	r2   r3   r4   r5   r6   parseFragmentstripr   ParserError)r9   no_leading_textr:   r;   r<   childrens   &&&&  r   fragments_fromstringrE   O   s     dH%%)**~GD%!8!8   -##D%;7;HJx{H55{  ""''(C(0)4 5 5Or   c                   \        V \        4      '       g   \        R4      h\        V4      p\	        WVV'       * R7      pV'       dm   \        V\        4      '       g   Rp\        V4      pV'       d@   \        V^ ,          \        4      '       d   V^ ,          Vn        V^  VP                  V4       V# V'       g   \        P                  ! R4      h\        V4      ^8  d   \        P                  ! R4      hV^ ,          pVP                  '       dH   VP                  P                  4       '       d(   \        P                  ! RVP                  ,          4      hRVn        V# )a  Parses a single HTML element; it is an error if there is more than
one element, or if anything but whitespace precedes or follows the
element.

If 'create_parent' is true (or is a tag name) then a parent node
will be created to encapsulate the HTML in a single element.  In
this case, leading or trailing text is allowed.

If `guess_charset` is true, the `chardet` library will perform charset
guessing on the string.
r0   )r:   r;   rC   r?   zNo elements foundzMultiple elements foundzElement followed by text: %rN)r2   r3   r4   boolrE   r   textextendr   rB   lentailrA   )r9   create_parentr:   r;   accept_leading_textelementsnew_rootresults   &&&&    r   fragment_fromstringrQ   q   s    dH%%)**}-#&//1H -22!M=)(1+x00 (QKOOH% 344
8}q 9::a[F{{{v{{((** > LMMFKMr   c                   \        V \        4      '       g   \        R4      h\        WVR7      pV R,          p\        V\        4      '       d   VP                  RR4      pVP                  4       P                  4       pVP                  R4      '       g   VP                  R4      '       d   V# \        VR4      p\        V4      '       d   V# \        VR	4      p\        V4      ^8X  d|   VP                  '       d!   VP                  P                  4       '       gJ   VR,          P                  '       d(   VR,          P                  P                  4       '       g
   V^ ,          # \        V4      '       d
   R
Vn        V# RVn        V# )a  Parse the html, returning a single element/document.

This tries to minimally parse the chunk of text, without knowing if it
is a fragment or a document.

'base_url' will set the document's base_url attribute (and the tree's
docinfo.URL)

If `guess_charset` is true, or if the input is not Unicode but a
byte string, the `chardet` library will perform charset guessing
on the string.
r0   )r;   r:   :N2   Nasciireplacez<htmlz	<!doctypeheadbodyr?   span)r2   r3   r4   r=   r6   decodelstriplower
startswithr.   rJ   rH   rA   rK   r   r,   )r9   r:   r;   docstartrV   rW   s   &&&    r   
fromstringr`      s2    dH%%)**
d,9;C IE% Wi0LLN  "E  E$4$4[$A$A
S&!D 4yy
S&!D 	D	Q			1B1Bbd2hmm&9&9&;&;Aw
 !&& K Kr   c                    Vf   \         p\        V \        4      '       g
   T pVf   RpM5\        V 4      '       d   \	        V 4      pVf   RpM\        V R4      pVf   Rp/ pV'       d   WR&   VP                  ! V3/ VB # )a
  Parse a filename, URL, or file-like object into an HTML document
tree.  Note: this returns a tree, not an element.  Use
``parse(...).getroot()`` to get the document root.

If ``guess_charset`` is true, the ``useChardet`` option is passed into
html5lib to enable character detection.  This option is on by default
when parsing from URLs, off by default when parsing from file(-like)
objects (which tend to return Unicode more often than not), and on by
default when parsing from a file path (which is read in binary mode).
FTrbr1   )r5   r2   r3   _looks_like_urlr   openr7   )filename_url_or_filer:   r;   fpr<   s   &&&  r   r7   r7      s     ~*H55! !M	-	.	.)*  M&-  MG  -<<&g&&r   c                     \        V 4      ^ ,          pV'       g   R# \        P                  R8X  d(   V\        P                  9   d   \        V4      ^8X  d   R# R# )    Fwin32T)r	   sysplatformstringascii_lettersrJ   )strschemes   & r   rc   rc      sB    c]1F
,,'
!f***Fq r   )NN)FNN)$r   rj   rl   html5libr   r    html5lib.treebuilders.etree_lxmlr   lxmlr   	lxml.htmlr   r   r   
basestringr3   	NameErrorr6   rn   urllib2r   ImportErrorurllib.requestr	   urllib.parser#   r&   xhtml_parserr.   r=   rE   rQ   r`   r7   rc   r5   r   r   r   <module>r{      s      . 8  I IH'&!
N N!4Sl S =L830D)X3l!'H
 lk  s|H  '&'  &%&  		sE   B B B( B; BBB%$B%(B87B8;CC