U
    ƒeÙ)  ã                   @   s^   d Z ddlmZ ddlmZ ddlmZmZmZm	Z	 G dd„ deƒZ
dd„ Zed	krZeƒ  d
S )aê  
A classifier based on the Naive Bayes algorithm.  In order to find the
probability for a label, this algorithm first uses the Bayes rule to
express P(label|features) in terms of P(label) and P(features|label):

|                       P(label) * P(features|label)
|  P(label|features) = ------------------------------
|                              P(features)

The algorithm then makes the 'naive' assumption that all features are
independent, given the label:

|                       P(label) * P(f1|label) * ... * P(fn|label)
|  P(label|features) = --------------------------------------------
|                                         P(features)

Rather than computing P(features) explicitly, the algorithm just
calculates the numerator for each label, and normalizes them so they
sum to one:

|                       P(label) * P(f1|label) * ... * P(fn|label)
|  P(label|features) = --------------------------------------------
|                        SUM[l]( P(l) * P(f1|l) * ... * P(fn|l) )
é    )Údefaultdict)ÚClassifierI)ÚDictionaryProbDistÚELEProbDistÚFreqDistÚsum_logsc                   @   sT   e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zddd„Zddd„Z	e
efdd„ƒZdS )ÚNaiveBayesClassifiera  
    A Naive Bayes classifier.  Naive Bayes classifiers are
    paramaterized by two probability distributions:

      - P(label) gives the probability that an input will receive each
        label, given no information about the input's features.

      - P(fname=fval|label) gives the probability that a given feature
        (fname) will receive a given value (fval), given that the
        label (label).

    If the classifier encounters an input with a feature that has
    never been seen with any label, then rather than assigning a
    probability of 0 to all labels, it will ignore that feature.

    The feature value 'None' is reserved for unseen feature values;
    you generally should not use 'None' as a feature value for one of
    your own features.
    c                 C   s   || _ || _t| ¡ ƒ| _dS )a=  
        :param label_probdist: P(label), the probability distribution
            over labels.  It is expressed as a ``ProbDistI`` whose
            samples are labels.  I.e., P(label) =
            ``label_probdist.prob(label)``.

        :param feature_probdist: P(fname=fval|label), the probability
            distribution for feature values, given labels.  It is
            expressed as a dictionary whose keys are ``(label, fname)``
            pairs and whose values are ``ProbDistI`` objects over feature
            values.  I.e., P(fname=fval|label) =
            ``feature_probdist[label,fname].prob(fval)``.  If a given
            ``(label,fname)`` is not a key in ``feature_probdist``, then
            it is assumed that the corresponding P(fname=fval|label)
            is 0 for all values of ``fval``.
        N)Ú_label_probdistÚ_feature_probdistÚlistÚsamplesÚ_labels)ÚselfÚlabel_probdistÚfeature_probdist© r   úY/var/www/html/assets/scripts/venv/lib/python3.8/site-packages/nltk/classify/naivebayes.pyÚ__init__@   s    zNaiveBayesClassifier.__init__c                 C   s   | j S ©N)r   )r   r   r   r   ÚlabelsU   s    zNaiveBayesClassifier.labelsc                 C   s   |   |¡ ¡ S r   )Úprob_classifyÚmax)r   Ú
featuresetr   r   r   ÚclassifyX   s    zNaiveBayesClassifier.classifyc                 C   sÒ   |  ¡ }t| ¡ ƒD ](}| jD ]}||f| jkr qq||= qi }| jD ]}| j |¡||< qH| jD ]^}| ¡ D ]P\}}||f| jkr¬| j||f }||  | |¡7  < qp||  tg ƒ7  < qpqdt	|dddS )NT)Ú	normalizeÚlog)
Úcopyr   Úkeysr   r
   r	   ÚlogprobÚitemsr   r   )r   r   ÚfnameÚlabelr   ÚfvalZfeature_probsr   r   r   r   [   s     


z"NaiveBayesClassifier.prob_classifyé
   c              	      sð   | j ‰ tdƒ |  |¡D ]Ò\‰‰‡ ‡‡fdd„‰t‡ ‡‡fdd„| jD ƒ‡fdd„dd	}t|ƒd
krhq|d }|d }ˆ |ˆf  ˆ¡dkr”d}n(dˆ |ˆf  ˆ¡ˆ |ˆf  ˆ¡  }tdˆˆd| d d… d| d d… |f ƒ qd S )NzMost Informative Featuresc                    s   ˆ | ˆf   ˆ¡S r   )Úprob)Úl©Úcpdistr    r"   r   r   Ú	labelprobƒ   s    zFNaiveBayesClassifier.show_most_informative_features.<locals>.labelprobc                 3   s&   | ]}ˆˆ |ˆf   ¡ kr|V  qd S r   )r   )Ú.0r%   r&   r   r   Ú	<genexpr>‡   s      zFNaiveBayesClassifier.show_most_informative_features.<locals>.<genexpr>c                    s   ˆ | ƒ | fS r   r   )Úelement)r(   r   r   Ú<lambda>ˆ   ó    zENaiveBayesClassifier.show_most_informative_features.<locals>.<lambda>T)ÚkeyÚreverseé   r   éÿÿÿÿZINFz%8.1fz"%24s = %-14r %6s : %-6s = %s : 1.0z%sé   )r
   ÚprintÚmost_informative_featuresÚsortedr   Úlenr$   )r   Únr   Zl0Úl1Úratior   )r'   r    r"   r(   r   Úshow_most_informative_features|   s.    
ý"ÿ$ÿÿz3NaiveBayesClassifier.show_most_informative_featureséd   c           	         sØ   t | dƒr| jd|… S tƒ }tdd„ ƒ‰ tdd„ ƒ‰| j ¡ D ]p\\}}}| ¡ D ]Z}||f}| |¡ | |¡}t	|ˆ | ƒˆ |< t
|ˆ| ƒˆ|< ˆ| dkrT| |¡ qTq@t|‡ ‡fdd„d| _| jd|… S )	a—  
        Return a list of the 'most informative' features used by this
        classifier.  For the purpose of this function, the
        informativeness of a feature ``(fname,fval)`` is equal to the
        highest value of P(fname=fval|label), for any label, divided by
        the lowest value of P(fname=fval|label), for any label:

        |  max[ P(fname=fval|label1) / P(fname=fval|label2) ]
        Ú_most_informative_featuresNc                   S   s   dS )Ng        r   r   r   r   r   r,   «   r-   z@NaiveBayesClassifier.most_informative_features.<locals>.<lambda>c                   S   s   dS )Ng      ð?r   r   r   r   r   r,   ¬   r-   r   c                    s0   ˆ|  ˆ |   | d | d dkt | d ƒ ¡ fS )Nr   r0   )NFT)ÚstrÚlower)Zfeature_©ZmaxprobZminprobr   r   r,   ¼   s
    
ü)r.   )Úhasattrr<   Úsetr   r
   r   r   Úaddr$   r   ÚminÚdiscardr5   )	r   r7   Úfeaturesr!   r    Úprobdistr"   ÚfeatureÚpr   r?   r   r4   š   s&    



þ	z.NaiveBayesClassifier.most_informative_featuresc                 C   s*  t ƒ }tt ƒ}ttƒ}tƒ }|D ]Z\}}||  d7  < | ¡ D ]8\}	}
|||	f |
  d7  < ||	  |
¡ | |	¡ q@q |D ]\}|| }|D ]J}	|||	f  ¡ }|| dkr|||	f d  || 7  < ||	  d¡ qq€||ƒ}i }| ¡ D ],\\}}	}||t||	 ƒd}||||	f< qò| ||ƒS )z‹
        :param labeled_featuresets: A list of classified featuresets,
            i.e., a list of tuples ``(featureset, label)``.
        r0   r   N)Zbins)r   r   rA   r   rB   ÚNr6   )ÚclsZlabeled_featuresetsZ	estimatorZlabel_freqdistZfeature_freqdistZfeature_valuesÚfnamesr   r!   r    r"   Znum_samplesÚcountr   r   ZfreqdistrF   r   r   r   ÚtrainÅ   s.    zNaiveBayesClassifier.trainN)r#   )r;   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r:   r4   Úclassmethodr   rM   r   r   r   r   r   +   s   !

+r   c                  C   s"   ddl m}  | tjƒ}| ¡  d S )Nr   )Ú
names_demo)Znltk.classify.utilrS   r   rM   r:   )rS   Ú
classifierr   r   r   Údemoü   s    
rU   Ú__main__N)rQ   Úcollectionsr   Znltk.classify.apir   Znltk.probabilityr   r   r   r   r   rU   rN   r   r   r   r   Ú<module>   s    R