
    P'g :                         d Z ddlZddlZddlZddlmZmZmZ ddlm	Z	 ddl
mZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ  G d d      Zy)a  
Module containing the UniversalDetector detector class, which is the primary
class a user of ``chardet`` should use.

:author: Mark Pilgrim (initial port to Python)
:author: Shy Shalom (original C code)
:author: Dan Blanchard (major refactoring for 3.0)
:author: Ian Cordasco
    N)ListOptionalUnion   )CharSetGroupProber)CharSetProber)
InputStateLanguageFilterProbingState)EscCharSetProber)Latin1Prober)MacRomanProber)MBCSGroupProber)
ResultDict)SBCSGroupProber)UTF1632Proberc            	       N   e Zd ZdZdZ ej                  d      Z ej                  d      Z ej                  d      Z	dddd	d
ddddZ
ddddddddZej                  dfdededdfdZedefd       Zedefd       Zedee   fd       Zd!dZdeeef   ddfdZdefd Zy)"UniversalDetectoraq  
    The ``UniversalDetector`` class underlies the ``chardet.detect`` function
    and coordinates all of the different charset probers.

    To get a ``dict`` containing an encoding and its confidence, you can simply
    run:

    .. code::

            u = UniversalDetector()
            u.feed(some_bytes)
            u.close()
            detected = u.result

    g?s   [-]s   (|~{)s   [-]zWindows-1252zWindows-1250zWindows-1251zWindows-1256zWindows-1253zWindows-1255zWindows-1254zWindows-1257)
iso-8859-1z
iso-8859-2z
iso-8859-5z
iso-8859-6z
iso-8859-7z
iso-8859-8
iso-8859-9ziso-8859-13zISO-8859-11GB18030CP949UTF-16)asciir   ztis-620r   gb2312zeuc-krzutf-16leFlang_filtershould_rename_legacyreturnNc                    d | _         d | _        g | _        d dd d| _        d| _        d| _        t        j                  | _        d| _	        || _
        t        j                  t              | _        d| _        || _        | j#                          y )N        encoding
confidencelanguageF    )_esc_charset_prober_utf1632_prober_charset_probersresultdone	_got_datar	   
PURE_ASCII_input_state
_last_charr   logging	getLogger__name__logger_has_win_bytesr   reset)selfr   r   s      ]/var/www/html/knws/venv/lib/python3.12/site-packages/pip/_vendor/chardet/universaldetector.py__init__zUniversalDetector.__init__d   s    
 @D 8<57#

 	&11&''1#$8!

r%   c                     | j                   S N)r-   r5   s    r6   input_statezUniversalDetector.input_state{   s       r%   c                     | j                   S r9   )r3   r:   s    r6   has_win_byteszUniversalDetector.has_win_bytes   s    """r%   c                     | j                   S r9   )r(   r:   s    r6   charset_probersz!UniversalDetector.charset_probers   s    $$$r%   c                 V   dddd| _         d| _        d| _        d| _        t        j
                  | _        d| _        | j                  r| j                  j                          | j                  r| j                  j                          | j                  D ]  }|j                           y)z
        Reset the UniversalDetector and all of its probers back to their
        initial states.  This is called by ``__init__``, so you only need to
        call this directly in between analyses of different documents.
        Nr    r!   Fr%   )r)   r*   r+   r3   r	   r,   r-   r.   r&   r4   r'   r(   )r5   probers     r6   r4   zUniversalDetector.reset   s     $(sM	#&11##$$**,  &&(++ 	FLLN	r%   byte_strc                 V	   | j                   ry|syt        |t              st        |      }| j                  s|j	                  t
        j                        rdddd| _        n|j	                  t
        j                  t
        j                  f      rdddd| _        nt|j	                  d      rdddd| _        nW|j	                  d	      rd
ddd| _        n:|j	                  t
        j                  t
        j                  f      rdddd| _        d| _        | j                  d   d| _         y| j                  t        j                  k(  r| j                  j!                  |      rt        j"                  | _        nZ| j                  t        j                  k(  r=| j$                  j!                  | j&                  |z         rt        j(                  | _        |dd | _        | j*                  st-               | _        | j*                  j.                  t0        j2                  k(  rk| j*                  j5                  |      t0        j6                  k(  r?| j*                  j8                  | j*                  j;                         dd| _        d| _         y| j                  t        j(                  k(  r| j<                  st?        | j@                        | _        | j<                  j5                  |      t0        j6                  k(  rS| j<                  j8                  | j<                  j;                         | j<                  jB                  d| _        d| _         yy| j                  t        j"                  k(  r:| jD                  stG        | j@                        g| _"        | j@                  tH        jJ                  z  r#| jD                  jM                  tO                      | jD                  jM                  tQ                      | jD                  jM                  tS                      | jD                  D ]Z  }|j5                  |      t0        j6                  k(  s&|j8                  |j;                         |jB                  d| _        d| _          n | jT                  j!                  |      rd| _+        yyy)a  
        Takes a chunk of a document and feeds it through all of the relevant
        charset probers.

        After calling ``feed``, you can check the value of the ``done``
        attribute to see if you need to continue feeding the
        ``UniversalDetector`` more data, or if it has made a prediction
        (in the ``result`` attribute).

        .. note::
           You should always call ``close`` when you're done feeding in your
           document if ``done`` is not already ``True``.
        Nz	UTF-8-SIG      ? r!   zUTF-32s     zX-ISO-10646-UCS-4-3412s     zX-ISO-10646-UCS-4-2143r   Tr"   ),r*   
isinstance	bytearrayr+   
startswithcodecsBOM_UTF8r)   BOM_UTF32_LEBOM_UTF32_BEBOM_LEBOM_BEr-   r	   r,   HIGH_BYTE_DETECTORsearch	HIGH_BYTEESC_DETECTORr.   	ESC_ASCIIr'   r   stater   	DETECTINGfeedFOUND_ITcharset_nameget_confidencer&   r   r   r$   r(   r   r
   NON_CJKappendr   r   r   WIN_BYTE_DETECTORr3   )r5   rB   rA   s      r6   rW   zUniversalDetector.feed   s    99(I. *H ~~""6??3 !,"% "
 $$f&9&96;N;N%OP ,43TVW$$%89 !9"% "	 $$%89 !9"% "	 $$fmmV]]%CD ,43TVW!DN{{:&2 	 
 5 55&&--h7$.$8$8!!!Z%:%::%%,,T__x-GH$.$8$8!"23- ###0?D %%)?)??##((2l6K6KK $ 4 4 A A"&"6"6"E"E"G "
 !	 
 4 44+++;D<L<L+M('',,X6,:O:OO $ 8 8 E E"&":":"I"I"K $ 8 8 A A
 !	 P *"6"66(()89I9I)J(K%##n&<&<<))001BC%%,,\^<%%,,^-=>// ;;x(L,A,AA$*$7$7&,&;&;&=$*OO#DK
 !%DI %%,,X6&*# 7# 7r%   c           	      H   | j                   r| j                  S d| _         | j                  s| j                  j	                  d       nD| j
                  t        j                  k(  rdddd| _        n| j
                  t        j                  k(  rd}d}d}| j                  D ]  }|s|j                         }||kD  s|}|}! |r|| j                  kD  r|j                  }|J |j                         }|j                         }|j                  d	      r(| j                  r| j                   j#                  ||      }| j$                  r.| j&                  j#                  |xs dj                         |      }|||j(                  d| _        | j                  j+                         t,        j.                  k  r| j                  d
   | j                  j	                  d       | j                  D ]  }|st1        |t2              rR|j4                  D ]B  }| j                  j	                  d|j                  |j(                  |j                                D h| j                  j	                  d|j                  |j(                  |j                                 | j                  S )z
        Stop analyzing the current document and come up with a final
        prediction.

        :returns:  The ``result`` attribute, a ``dict`` with the keys
                   `encoding`, `confidence`, and `language`.
        Tzno data received!r   rD   rE   r!   Nr    ziso-8859r"   z no probers hit minimum thresholdz%s %s confidence = %s)r*   r)   r+   r2   debugr-   r	   r,   rR   r(   rZ   MINIMUM_THRESHOLDrY   lowerrI   r3   ISO_WIN_MAPgetr   
LEGACY_MAPr$   getEffectiveLevelr/   DEBUGrG   r   probers)	r5   prober_confidencemax_prober_confidence
max_proberrA   rY   lower_charset_namer#   group_probers	            r6   closezUniversalDetector.close  sp    99;;	~~KK12 *"7"77'.crRDK *"6"66 $$'!J// ($*$9$9$;!$'<<,=)!'J( 4t7M7MM)66#///%1%7%7%9"'668
 &00<**'+'7'7';';.( ,,#'??#6#6%+224l$L !-", * 3 3 ;;((*gmm;{{:&.!!"DE$($9$9 L' !,0BC&2&:&: F KK-- 7 & 3 3 & & 5 5 7	 ))3(55(11(779	$ {{r%   )r   N)r1   
__module____qualname____doc__r`   recompilerP   rS   r]   rb   rd   r
   ALLboolr7   propertyintr;   r=   r   r   r?   r4   r   bytesrH   rW   r   rm    r%   r6   r   r   8   s7     #N32::l+L"

>2$$$$$$$%	K  $ $J '5&8&8%*# # 
	. !S ! ! #t # # %m!4 % %&A+U5)#34 A+ A+FMz Mr%   r   )rp   rJ   r/   rq   typingr   r   r   charsetgroupproberr   charsetproberr   enumsr	   r
   r   	escproberr   latin1proberr   macromanproberr   mbcsgroupproberr   
resultdictr   sbcsgroupproberr   utf1632proberr   r   rx   r%   r6   <module>r      sH   8   	 ( ( 2 ( ; ; ' & * , " , (r rr%   