U
    e+1                     @   s,   d Z ddlZddlmZ G dd deZdS )z
A word stemmer based on the Lancaster (Paice/Husk) stemming algorithm.
Paice, Chris D. "Another Stemmer." ACM SIGIR Forum 24.3 (1990): 56-61.
    N)StemmerIc                   @   s`   e Zd ZdZdZdddZdddZd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd ZdS )LancasterStemmera/  
    Lancaster Stemmer

        >>> from nltk.stem.lancaster import LancasterStemmer
        >>> st = LancasterStemmer()
        >>> st.stem('maximum')     # Remove "-um" when word is intact
        'maxim'
        >>> st.stem('presumably')  # Don't remove "-um" when word is not intact
        'presum'
        >>> st.stem('multiply')    # No action taken if word ends with "-ply"
        'multiply'
        >>> st.stem('provision')   # Replace "-sion" with "-j" to trigger "j" set of rules
        'provid'
        >>> st.stem('owed')        # Word starting with vowel must contain at least 2 letters
        'ow'
        >>> st.stem('ear')         # ditto
        'ear'
        >>> st.stem('saying')      # Words starting with consonant must contain at least 3
        'say'
        >>> st.stem('crying')      #     letters and one of those letters must be a vowel
        'cry'
        >>> st.stem('string')      # ditto
        'string'
        >>> st.stem('meant')       # ditto
        'meant'
        >>> st.stem('cement')      # ditto
        'cem'
        >>> st_pre = LancasterStemmer(strip_prefix_flag=True)
        >>> st_pre.stem('kilometer') # Test Prefix
        'met'
        >>> st_custom = LancasterStemmer(rule_tuple=("ssen4>", "s1t."))
        >>> st_custom.stem("ness") # Change s to t
        'nest'
    )szai*2.za*1.zbb1.zcity3s.zci2>zcn1t>zdd1.zdei3y>zdeec2ss.zdee1.zde2>zdooh4>ze1>zfeil1v.zfi2>zgni3>zgai3y.zga2>zgg1.zht*2.z	hsiug5ct.zhsi3>zi*1.zi1y>zji1d.zjuf1s.zju1d.zjo1d.zjeh1r.zjrev1t.zjsim2t.zjn1d.zj1s.zlbaifi6.zlbai4y.zlba3>zlbi3.zlib2l>zlc1.zlufi4y.zluf3>zlu2.zlai3>zlau3>zla2>zll1.zmui3.zmu*2.zmsi3>zmm1.znois4j>znoix4ct.znoi3>znai3>zna2>znee0.zne2>znn1.zpihs4>zpp1.zre2>zrae0.zra2.zro2>zru2>zrr1.zrt1>zrei3y>zsei3y>zsis2.zsi2>zssen4>zss0.zsuo3>zsu*2.zs*1>zs0.z	tacilp4y.zta2>ztnem4>ztne3>ztna3>ztpir2b.ztpro2b.ztcud1.ztpmus2.ztpec2iv.ztulo2v.ztsis0.ztsi3>ztt1.zuqi3.zugo1.zvis3j>zvie0.zvi2>zylb1>zyli3y>zylp0.zyl2>zygo1.zyhp1.zymo1.zypo1.zyti3>zyte3>zytl2.zyrtsi5.zyra3>zyro3>zyfi3.zycn2t>zyca3>zzi2>zzy1s.NFc                 C   s    i | _ || _|r|n| j| _dS )z,Create an instance of the Lancaster stemmer.N)rule_dictionary_strip_prefixdefault_rule_tuple_rule_tuple)self
rule_tupleZstrip_prefix_flag r
   T/var/www/html/assets/scripts/venv/lib/python3.8/site-packages/nltk/stem/lancaster.py__init__   s    zLancasterStemmer.__init__c                 C   sz   |r|n| j }td}i | _|D ]R}||s@td| d|dd }|| jkrh| j| | q"|g| j|< q"dS )a(  Validate the set of rules used in this stemmer.

        If this function is called as an individual method, without using stem
        method, rule_tuple argument will be compiled into self.rule_dictionary.
        If this function is called within stem, self._rule_tuple will be used.

        z^[a-z]+\*?\d[a-z]*[>\.]?$z	The rule z is invalidr      N)r   recompiler   match
ValueErrorappend)r   r	   
valid_ruleruleZfirst_letterr
   r
   r   
parseRules   s    	


zLancasterStemmer.parseRulesc                 C   s:   |  }| jr| |n|}|}| js.|   | ||S )z(Stem a word using the Lancaster stemmer.)lowerr   _LancasterStemmer__stripPrefixr   r   _LancasterStemmer__doStemming)r   wordintact_wordr
   r
   r   stem   s    zLancasterStemmer.stemc                 C   s
  t d}d}|r| |}|dk s4|| | jkr:d}qd}| j||  D ]}||}|rL| \}	}
}}}t|}||	ddd rL|
r||kr| ||r| 	|||}d}|dkrd} qqL| ||rL| 	|||}d}|dkrd} qqL|dkrd}q|S )z Perform the actual word stemmingz#^([a-z]+)(\*?)(\d)([a-z]*)([>\.]?)$Tr   FN.)
r   r    _LancasterStemmer__getLastLetterr   r   groupsintendswith_LancasterStemmer__isAcceptable_LancasterStemmer__applyRule)r   r   r   r   proceedZlast_letter_positionZrule_was_appliedr   Z
rule_matchZending_stringZintact_flagremove_totalappend_stringZ	cont_flagr
   r
   r   Z__doStemming   sb    


     zLancasterStemmer.__doStemmingc                 C   s0   d}t t|D ]}||  r&|}q q,q|S )zHGet the zero-based index of the last alphabetic character in this stringr   )rangelenisalpha)r   r   Zlast_letterpositionr
   r
   r   Z__getLastLetter  s    z LancasterStemmer.__getLastLetterc                 C   s\   d}|d dkr&t || dkrXd}n2t || dkrX|d dkrHd}n|d dkrXd}|S )z1Determine if the word is acceptable for stemming.Fr   Zaeiouy   T   r   r(   )r   r   r%   Zword_is_acceptabler
   r
   r   Z__isAcceptable%  s    zLancasterStemmer.__isAcceptablec                 C   s(   t || }|d| }|r$||7 }|S )z#Apply the stemming rule to the wordr   r-   )r   r   r%   r&   Znew_word_lengthr
   r
   r   Z__applyRule6  s
    zLancasterStemmer.__applyRulec                 C   s,   dD ]"}| |r|t|d   S q|S )zYRemove prefix from a word.

        This function originally taken from Whoosh.

        )	ZkilomicroZmilliZintraZultramegaZnanoZpicoZpseudoN)
startswithr(   )r   r   prefixr
   r
   r   Z__stripPrefixA  s    
zLancasterStemmer.__stripPrefixc                 C   s   dS )Nz<LancasterStemmer>r
   )r   r
   r
   r   __repr__V  s    zLancasterStemmer.__repr__)NF)N)__name__
__module____qualname____doc__r   r   r   r   r   r   r"   r#   r   r2   r
   r
   r
   r   r      s   $v
	
@
r   )r6   r   Znltk.stem.apir   r   r
   r
   r
   r   <module>   s   