B
    He :                 @   s   d Z ddlZddlZddlZddlmZmZmZ ddlm	Z	 ddl
mZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ G dd dZdS )a  
Module containing the UniversalDetector detector class, which is the primary
class a user of ``chardet`` should use.

:author: Mark Pilgrim (initial port to Python)
:author: Shy Shalom (original C code)
:author: Dan Blanchard (major refactoring for 3.0)
:author: Ian Cordasco
    N)ListOptionalUnion   )CharSetGroupProber)CharSetProber)
InputStateLanguageFilterProbingState)EscCharSetProber)Latin1Prober)MacRomanProber)MBCSGroupProber)
ResultDict)SBCSGroupProber)UTF1632Proberc            	   @   s   e Zd ZdZdZedZedZedZ	dddd	d
ddddZ
ddddddddZejdfeeddddZeedddZeedddZeee dddZddd d!Zeeef dd"d#d$Zedd%d&ZdS )'UniversalDetectoraq  
    The ``UniversalDetector`` class underlies the ``chardet.detect`` function
    and coordinates all of the different charset probers.

    To get a ``dict`` containing an encoding and its confidence, you can simply
    run:

    .. code::

            u = UniversalDetector()
            u.feed(some_bytes)
            u.close()
            detected = u.result

    g?s   [-]s   (|~{)s   [-]zWindows-1252zWindows-1250zWindows-1251zWindows-1256zWindows-1253zWindows-1255zWindows-1254zWindows-1257)z
iso-8859-1z
iso-8859-2z
iso-8859-5z
iso-8859-6z
iso-8859-7z
iso-8859-8z
iso-8859-9ziso-8859-13zISO-8859-11ZGB18030CP949zUTF-16)asciiz
iso-8859-1ztis-620z
iso-8859-9gb2312zeuc-krzutf-16leFN)lang_filtershould_rename_legacyreturnc             C   sd   d | _ d | _g | _d dd d| _d| _d| _tj| _d| _	|| _
tt| _d| _|| _|   d S )Ng        )encoding
confidencelanguageF    )_esc_charset_prober_utf1632_prober_charset_probersresultdone	_got_datar   
PURE_ASCII_input_state
_last_charr   logging	getLogger__name__logger_has_win_bytesr   reset)selfr   r    r-   y/var/www/downstreamdata.science/rtclock/rtclock-venv/lib/python3.7/site-packages/pip/_vendor/chardet/universaldetector.py__init__d   s    
zUniversalDetector.__init__)r   c             C   s   | j S )N)r$   )r,   r-   r-   r.   input_state{   s    zUniversalDetector.input_statec             C   s   | j S )N)r*   )r,   r-   r-   r.   has_win_bytes   s    zUniversalDetector.has_win_bytesc             C   s   | j S )N)r   )r,   r-   r-   r.   charset_probers   s    z!UniversalDetector.charset_probersc             C   sj   dddd| _ d| _d| _d| _tj| _d| _| jr>| j	  | j
rN| j
	  x| jD ]}|	  qVW dS )z
        Reset the UniversalDetector and all of its probers back to their
        initial states.  This is called by ``__init__``, so you only need to
        call this directly in between analyses of different documents.
        Ng        )r   r   r   Fr   )r    r!   r"   r*   r   r#   r$   r%   r   r+   r   r   )r,   proberr-   r-   r.   r+      s    

zUniversalDetector.reset)byte_strr   c             C   s  | j r
dS |sdS t|ts$t|}| js|tjrFdddd| _nv|tjtj	frhdddd| _nT|drdddd| _n:|d	rd
ddd| _n |tj
tjfrdddd| _d| _| jd dk	rd| _ dS | jtjkr(| j|rtj| _n*| jtjkr(| j| j| r(tj| _|dd | _| jsFt | _| jjtjkr| j|tjkr| jj| j dd| _d| _ dS | jtjkr| jst| j | _| j|tjkr| jj| j | jj!d| _d| _ n| jtjkr| j"sBt#| j g| _"| j t$j%@ r&| j"&t'  | j"&t(  | j"&t)  x@| j"D ]6}||tjkrJ|j| |j!d| _d| _ P qJW | j*|rd| _+dS )a  
        Takes a chunk of a document and feeds it through all of the relevant
        charset probers.

        After calling ``feed``, you can check the value of the ``done``
        attribute to see if you need to continue feeding the
        ``UniversalDetector`` more data, or if it has made a prediction
        (in the ``result`` attribute).

        .. note::
           You should always call ``close`` when you're done feeding in your
           document if ``done`` is not already ``True``.
        Nz	UTF-8-SIGg      ? )r   r   r   zUTF-32s     zX-ISO-10646-UCS-4-3412s     zX-ISO-10646-UCS-4-2143zUTF-16Tr   ),r!   
isinstance	bytearrayr"   
startswithcodecsBOM_UTF8r    BOM_UTF32_LEBOM_UTF32_BEBOM_LEBOM_BEr$   r   r#   HIGH_BYTE_DETECTORsearch	HIGH_BYTEESC_DETECTORr%   	ESC_ASCIIr   r   stater
   	DETECTINGfeedFOUND_ITcharset_nameget_confidencer   r   r   r   r   r   r	   NON_CJKappendr   r   r   WIN_BYTE_DETECTORr*   )r,   r4   r3   r-   r-   r.   rG      s    




zUniversalDetector.feedc       	   	   C   s  | j r| jS d| _ | js&| jd n| jtjkrBdddd| _n| jtjkrd}d}d}x,| j	D ]"}|snqd|
 }||krd|}|}qdW |r|| jkr|j}|dk	st| }|
 }|d	r| jr| j||}| jr| j|pd |}|||jd| _| j tjkr| jd
 dkr| jd xn| j	D ]d}|sLq>t|trxF|jD ] }| jd|j|j|
  q`W n| jd|j|j|
  q>W | jS )z
        Stop analyzing the current document and come up with a final
        prediction.

        :returns:  The ``result`` attribute, a ``dict`` with the keys
                   `encoding`, `confidence`, and `language`.
        Tzno data received!r   g      ?r5   )r   r   r   Ng        ziso-8859r   z no probers hit minimum thresholdz%s %s confidence = %s)r!   r    r"   r)   debugr$   r   r#   rB   r   rJ   MINIMUM_THRESHOLDrI   AssertionErrorlowerr9   r*   ISO_WIN_MAPgetr   
LEGACY_MAPr   getEffectiveLevelr&   DEBUGr7   r   probers)	r,   prober_confidencemax_prober_confidence
max_proberr3   rI   lower_charset_namer   group_proberr-   r-   r.   close  sh    	
zUniversalDetector.close)r(   
__module____qualname____doc__rO   recompiler@   rC   rM   rR   rT   r	   ALLboolr/   propertyintr0   r1   r   r   r2   r+   r   bytesr8   rG   r   r]   r-   r-   r-   r.   r   8   sB   


 r   )r`   r:   r&   ra   typingr   r   r   charsetgroupproberr   charsetproberr   enumsr   r	   r
   	escproberr   latin1proberr   Zmacromanproberr   mbcsgroupproberr   Z
resultdictr   sbcsgroupproberr   Zutf1632proberr   r   r-   r-   r-   r.   <module>$   s   