U
    eR                    @   s  d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
 G dd deZG d	d
 d
eZG dd deejZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd  d eZG d!d" d"eZG d#d$ d$eZG d%d& d&eZG d'd( d(eZG d)d* d*eZG d+d, d,eZG d-d. d.eZd/d0 ZdS )1z
Snowball stemmers

This module provides a port of the Snowball stemmers
developed by Martin Porter.

There is also a demo function: `snowball.demo()`.

    N)	stopwords)porter)StemmerI)prefix_replacesuffix_replacec                   @   s&   e Zd ZdZdZd	ddZdd ZdS )
SnowballStemmera  
    Snowball Stemmer

    The following languages are supported:
    Arabic, Danish, Dutch, English, Finnish, French, German,
    Hungarian, Italian, Norwegian, Portuguese, Romanian, Russian,
    Spanish and Swedish.

    The algorithm for English is documented here:

        Porter, M. "An algorithm for suffix stripping."
        Program 14.3 (1980): 130-137.

    The algorithms have been developed by Martin Porter.
    These stemmers are called Snowball, because Porter created
    a programming language with this name for creating
    new stemming algorithms. There is more information available
    at http://snowball.tartarus.org/

    The stemmer is invoked as shown below:

    >>> from nltk.stem import SnowballStemmer # See which languages are supported
    >>> print(" ".join(SnowballStemmer.languages)) # doctest: +NORMALIZE_WHITESPACE
    arabic danish dutch english finnish french german hungarian
    italian norwegian porter portuguese romanian russian
    spanish swedish
    >>> stemmer = SnowballStemmer("german") # Choose a language
    >>> stemmer.stem("Autobahnen") # Stem a word
    'autobahn'

    Invoking the stemmers that way is useful if you do not know the
    language to be stemmed at runtime. Alternatively, if you already know
    the language, then you can invoke the language specific stemmer directly:

    >>> from nltk.stem.snowball import GermanStemmer
    >>> stemmer = GermanStemmer()
    >>> stemmer.stem("Autobahnen")
    'autobahn'

    :param language: The language whose subclass is instantiated.
    :type language: str or unicode
    :param ignore_stopwords: If set to True, stopwords are
                             not stemmed and returned unchanged.
                             Set to False by default.
    :type ignore_stopwords: bool
    :raise ValueError: If there is no stemmer for the specified
                           language, a ValueError is raised.
    arabicdanishdutchenglishfinnishfrenchgerman	hungarianitalian	norwegianr   
portugueseromanianrussianspanishswedishFc                 C   sN   || j krtd| dt | d  }||| _| jj| _| jj| _d S )NzThe language 'z' is not supported.ZStemmer)	languages
ValueErrorglobals
capitalizestemmerstemr   )selflanguageignore_stopwordsZstemmerclass r!   S/var/www/html/assets/scripts/venv/lib/python3.8/site-packages/nltk/stem/snowball.py__init__h   s    


zSnowballStemmer.__init__c                 C   s   | j | |S N)r   r   r   tokenr!   r!   r"   r   p   s    zSnowballStemmer.stemN)F)__name__
__module____qualname____doc__r   r#   r   r!   r!   r!   r"   r   "   s   1
r   c                   @   s"   e Zd ZdZdddZdd ZdS )	_LanguageSpecificStemmera  
    This helper subclass offers the possibility
    to invoke a specific stemmer directly.
    This is useful if you already know the language to be stemmed at runtime.

    Create an instance of the Snowball stemmer.

    :param ignore_stopwords: If set to True, stopwords are
                             not stemmed and returned unchanged.
                             Set to False by default.
    :type ignore_stopwords: bool
    Fc              
   C   s   t | j }|dr$|d d }t | _|rz t|D ]}| j| q<W n2 tk
r } zt	d
| |W 5 d }~X Y nX d S )Nr   izH{!r} has no list of stopwords. Please set 'ignore_stopwords' to 'False'.)typer'   lowerendswithsetr   wordsaddOSErrorr   format)r   r    r   worder!   r!   r"   r#      s     
z!_LanguageSpecificStemmer.__init__c                 C   s   dt | j dS )zO
        Print out the string representation of the respective class.

        <>)r,   r'   )r   r!   r!   r"   __repr__   s    z!_LanguageSpecificStemmer.__repr__N)F)r'   r(   r)   r*   r#   r8   r!   r!   r!   r"   r+   t   s   
r+   c                   @   s   e Zd ZdZdddZdS )PorterStemmeraF  
    A word stemmer based on the original Porter stemming algorithm.

        Porter, M. "An algorithm for suffix stripping."
        Program 14.3 (1980): 130-137.

    A few minor modifications have been made to Porter's basic
    algorithm.  See the source code of the module
    nltk.stem.porter for more information.

    Fc                 C   s   t | | tj|  d S r$   )r+   r#   r   r9   )r   r    r!   r!   r"   r#      s    zPorterStemmer.__init__N)F)r'   r(   r)   r*   r#   r!   r!   r!   r"   r9      s   r9   c                   @   s   e Zd ZdZdd ZdS )_ScandinavianStemmerz
    This subclass encapsulates a method for defining the string region R1.
    It is used by the Danish, Norwegian, and Swedish stemmer.

    c                 C   s   d}t dt|D ]}|| |kr||d  |krdt|d|d    krVdkrhn n|dd }n2t|d|d  dkr||d d }n|  S  qq|S )ar  
        Return the region R1 that is used by the Scandinavian stemmers.

        R1 is the region after the first non-vowel following a vowel,
        or is the null region at the end of the word if there is no
        such non-vowel. But then R1 is adjusted so that the region
        before it contains at least three letters.

        :param word: The word whose region R1 is determined.
        :type word: str or unicode
        :param vowels: The vowels of the respective language that are
                       used to determine the region R1.
        :type vowels: unicode
        :return: the region R1 for the respective word.
        :rtype: unicode
        :note: This helper method is invoked by the respective stem method of
               the subclasses DanishStemmer, NorwegianStemmer, and
               SwedishStemmer. It is not to be invoked directly!

               Nr   rangelen)r   r4   vowelsr1ir!   r!   r"   _r1_scandinavian   s    (z%_ScandinavianStemmer._r1_scandinavianN)r'   r(   r)   r*   rD   r!   r!   r!   r"   r:      s   r:   c                   @   s    e Zd ZdZdd Zdd ZdS )_StandardStemmerz~
    This subclass encapsulates two methods for defining the standard versions
    of the string regions R1, R2, and RV.

    c                 C   s   d}d}t dt|D ]4}|| |kr||d  |kr||d d } qLqt dt|D ]4}|| |krZ||d  |krZ||d d } qqZ||fS )a  
        Return the standard interpretations of the string regions R1 and R2.

        R1 is the region after the first non-vowel following a vowel,
        or is the null region at the end of the word if there is no
        such non-vowel.

        R2 is the region after the first non-vowel following a vowel
        in R1, or is the null region at the end of the word if there
        is no such non-vowel.

        :param word: The word whose regions R1 and R2 are determined.
        :type word: str or unicode
        :param vowels: The vowels of the respective language that are
                       used to determine the regions R1 and R2.
        :type vowels: unicode
        :return: (r1,r2), the regions R1 and R2 for the respective word.
        :rtype: tuple
        :note: This helper method is invoked by the respective stem method of
               the subclasses DutchStemmer, FinnishStemmer,
               FrenchStemmer, GermanStemmer, ItalianStemmer,
               PortugueseStemmer, RomanianStemmer, and SpanishStemmer.
               It is not to be invoked directly!
        :note: A detailed description of how to define R1 and R2
               can be found at http://snowball.tartarus.org/texts/r1r2.html

        r;   r<   Nr>   )r   r4   rA   rB   r2rC   r!   r!   r"   _r1r2_standard   s    z_StandardStemmer._r1r2_standardc                 C   s   d}t |dkr|d |krRtdt |D ]$}|| |kr*||d d } qq*nZ|d |kr|d |krtdt |D ]$}|| |krx||d d } qqxn|dd }|S )a  
        Return the standard interpretation of the string region RV.

        If the second letter is a consonant, RV is the region after the
        next following vowel. If the first two letters are vowels, RV is
        the region after the next following consonant. Otherwise, RV is
        the region after the third letter.

        :param word: The word whose region RV is determined.
        :type word: str or unicode
        :param vowels: The vowels of the respective language that are
                       used to determine the region RV.
        :type vowels: unicode
        :return: the region RV for the respective word.
        :rtype: unicode
        :note: This helper method is invoked by the respective stem method of
               the subclasses ItalianStemmer, PortugueseStemmer,
               RomanianStemmer, and SpanishStemmer. It is not to be
               invoked directly!

        r;      r<   Nr   r=   )r@   r?   r   r4   rA   rvrC   r!   r!   r"   _rv_standard  s    z_StandardStemmer._rv_standardN)r'   r(   r)   r*   rG   rK   r!   r!   r!   r"   rE      s   *rE   c                   @   s  e Zd ZdZedZedZedZdZ	edZ
edZedZed	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZ dZ!dZ"dZ#d Z$d!Z%d"Z&d#Z'd$Z(d%Z)d&Z*d'Z+d(Z,d)Z-d*Z.d+Z/d,Z0d-Z1d-Z2d.Z3d.Z4d.Z5d.Z6d.Z7d.Z8d.Z9d.Z:d.Z;d.Z<d.Z=d.Z>d/d0 Z?d1d2 Z@d3d4 ZAd5d6 ZBd7d8 ZCd9d: ZDd;d< ZEd=d> ZFd?d@ ZGdAdB ZHdCdD ZIdEdF ZJdGdH ZKdIdJ ZLdKdL ZMdMdN ZNdOdP ZOdQdR ZPdSdT ZQdUdV ZRdWdX ZSdYdZ ZTd[d\ ZUd]d^ ZVd_S )`ArabicStemmera  
    https://github.com/snowballstem/snowball/blob/master/algorithms/arabic/stem_Unicode.sbl (Original Algorithm)
    The Snowball Arabic light Stemmer
    Algorithm:

    - Assem Chelli
    - Abdelkrim Aries
    - Lakhdar Benzahia

    NLTK Version Author:

    - Lakhdar Benzahia
    z9[\u064b-\u064c-\u064d-\u064e-\u064f-\u0650-\u0651-\u0652]z[\u0640]z[\u060C-\u061B-\u061F])   أ   إ   آ   ؤu   ئz^[\u0622\u0623\u0625]z[\u0624]z[\u0626]z[\u0623\u0622\u0625])   كال   بال   ال   لل)   ة   ات)
   ي   ك   ه   نا   كم   ها   هن   هم   كما   هما   ن)   اrW      وrV      تrU   rW   )rY   rX      نيrZ   r\   r^   r]   r[      كنr`   r_      كمو)rd   rb   ra   rW   rZ      تا   تن   ان   ون   ين   تما)   واu   تم)rc      تموu   ى)rM      أأ   أآ   أؤ   أا   أإ)u   فالu   وال)u   فrc   )rS   rT   rQ   rR   )   بrX      ل   بب   كك)u   سيu   ستu   سنu   سأ)u   يستu   نستu   تست)rY   rX   )re   rZ   r\   r^   r]   r[   rf   )r`   r_   rg   )rb   ra   rW   )rZ   rh   ri   )rj   rk   rl   )rW   rX   rY   )rZ   r[   r\   r]   r^   )r_   r`   )rn   u   فا)rQ   rR   )rS   rT   )rX   rv   )rw   rx   TFc                 C   s.   | j d|}| jd|}| jd|}|S )zT
        :param token: string
        :return: normalized token type string
        r;   )_ArabicStemmer__vocalizationsub_ArabicStemmer__kasheeda(_ArabicStemmer__arabic_punctuation_marksr%   r!   r!   r"   Z__normalize_pre  s    zArabicStemmer.__normalize_prec                 C   sb   | j D ]}||rt||d} q&q| jd|}| jd|}| jd|}| jd|}|S )Nu   ءrb   rc   rW   )_ArabicStemmer__last_hamzatr.   r   _ArabicStemmer__initial_hamzatrz   _ArabicStemmer__waw_hamza_ArabicStemmer__yeh_hamza_ArabicStemmer__alefat)r   r&   Zhamzar!   r!   r"   Z__normalize_post!  s    

zArabicStemmer.__normalize_postc                 C   sr   | j D ]f}||r|| jkr@t|dkr@d| _d| _d| _ qn|| jkrt|dkrd| _d| _d| _ qnqd S )N   TFr=   )_ArabicStemmer__checks1
startswith_ArabicStemmer__articles_3lenr@   is_nounis_verb
is_defined_ArabicStemmer__articles_2lenr   r&   prefixr!   r!   r"   Z
__checks_1.  s    

zArabicStemmer.__checks_1c                 C   sb   | j D ]V}||r|dkr8t|dkr8d| _d| _ q^|dkrt|dkrd| _d| _ q^qd S )NrU   rH   TFrV   r=   )_ArabicStemmer__checks2r.   r@   r   r   r   r&   suffixr!   r!   r"   Z
__checks_2=  s    

zArabicStemmer.__checks_2c                 C   s   | j D ]}||r|| jkr@t|dkr@|d d }d| _ q|| jkrlt|dkrl|d d }d| _ q|| jkrt|dkr|d d }d| _ qq|S Nr   T      )!_ArabicStemmer__suffix_verb_step1r.   )_ArabicStemmer__conjugation_suffix_verb_1r@   suffixes_verb_step1_success)_ArabicStemmer__conjugation_suffix_verb_2)_ArabicStemmer__conjugation_suffix_verb_3r   r!   r!   r"   Z__Suffix_Verb_Step1J  s    

z!ArabicStemmer.__Suffix_Verb_Step1c                 C   s   | j D ]}||rt|dkr|dkrJt|dkrJ|d d }d| _ q|| jkrvt|dkrv|d d }d| _ q|| jkrt|dkr|d d }d| _ q|| jkrt|dkr|d d }d| _ q|dkrt|d	kr|d d
 }d| _ qq|S )Nr=   rd   r   r   Tr   r   rm   r   r   )"_ArabicStemmer__suffix_verb_step2ar.   r@   suffix_verb_step2a_success)_ArabicStemmer__conjugation_suffix_verb_4,_ArabicStemmer__conjugation_suffix_verb_past/_ArabicStemmer__conjugation_suffix_verb_presentr   r!   r!   r"   Z__Suffix_Verb_Step2a]  s.    
z"ArabicStemmer.__Suffix_Verb_Step2ac                 C   sb   | j D ]V}||r|dkr8t|dkr8|d d } q^|dkrt|dkr|d d } q^q|S )Nro   r   r   rc   r   r   )"_ArabicStemmer__suffix_verb_step2cr.   r@   r   r!   r!   r"   Z__Suffix_Verb_Step2cz  s    

z"ArabicStemmer.__Suffix_Verb_Step2cc                 C   s<   | j D ]0}||rt|dkr|d d }d| _ q8q|S Nr   r   T)"_ArabicStemmer__suffix_verb_step2br.   r@   suffix_verb_step2b_successr   r!   r!   r"   Z__Suffix_Verb_Step2b  s    
z"ArabicStemmer.__Suffix_Verb_Step2bc                 C   s<   | j D ]0}||rt|dkr|d d }d| _ q8q|S )Nr=   r   T)#_ArabicStemmer__suffix_noun_step2c2r.   r@   suffix_noun_step2c2_successr   r!   r!   r"   Z__Suffix_Noun_Step2c2  s    
z#ArabicStemmer.__Suffix_Noun_Step2c2c                 C   s   | j D ]}||r|| jkr@t|dkr@|d d }d| _ q|| jkrlt|dkrl|d d }d| _ q|| jkrt|dkr|d d }d| _ qq|S r   )"_ArabicStemmer__suffix_noun_step1ar.   )_ArabicStemmer__conjugation_suffix_noun_1r@   suffix_noun_step1a_success)_ArabicStemmer__conjugation_suffix_noun_2)_ArabicStemmer__conjugation_suffix_noun_3r   r!   r!   r"   Z__Suffix_Noun_Step1a  s    

z"ArabicStemmer.__Suffix_Noun_Step1ac                 C   s<   | j D ]0}||rt|dkr|d d }d| _ q8q|S )Nr   r   T)"_ArabicStemmer__suffix_noun_step2ar.   r@   suffix_noun_step2a_successr   r!   r!   r"   Z__Suffix_Noun_Step2a  s    
z"ArabicStemmer.__Suffix_Noun_Step2ac                 C   s<   | j D ]0}||rt|dkr|d d }d| _ q8q|S r   )"_ArabicStemmer__suffix_noun_step2br.   r@   suffix_noun_step2b_successr   r!   r!   r"   Z__Suffix_Noun_Step2b  s    
z"ArabicStemmer.__Suffix_Noun_Step2bc                 C   s6   | j D ]*}||rt|dkr|d d } q2q|S )Nr   r   )#_ArabicStemmer__suffix_noun_step2c1r.   r@   r   r!   r!   r"   Z__Suffix_Noun_Step2c1  s
    
z#ArabicStemmer.__Suffix_Noun_Step2c1c                 C   s<   | j D ]0}||rt|dkr|d d }d| _ q8q|S )Nr   r   T)"_ArabicStemmer__suffix_noun_step1br.   r@   suffixe_noun_step1b_successr   r!   r!   r"   Z__Suffix_Noun_Step1b  s    
z"ArabicStemmer.__Suffix_Noun_Step1bc                 C   s6   | j D ]*}||rt|dkr|d d } q2q|S )Nr=   r   )!_ArabicStemmer__suffix_noun_step3r.   r@   r   r!   r!   r"   Z__Suffix_Noun_Step3  s
    
z!ArabicStemmer.__Suffix_Noun_Step3c                 C   s&   | j D ]}||rt||d}q|S )NrW   )'_ArabicStemmer__suffix_all_alef_maqsurar.   r   r   r!   r!   r"   Z__Suffix_All_alef_maqsura  s    

z'ArabicStemmer.__Suffix_All_alef_maqsurac                 C   s   | j D ]}||rt|dkr|dkr:t||d} qq|dkrTt||d} qq|dkrnt||d} qq|dkrt||d	} qq|d
krt||d} qq|S )Nr=   rp   rM   rq   rO   rr   rP   rs   rb   rt   rN   )_ArabicStemmer__prefix_step1r   r@   r   r   r!   r!   r"   Z__Prefix_Step1  s$    
zArabicStemmer.__Prefix_Step1c                 C   s@   | j D ]4}||rt|dkr|t|d  }d| _ q<q|S )Nr   T)_ArabicStemmer__prefix_step2ar   r@   prefix_step2a_successr   r!   r!   r"   Z__Prefix_Step2a  s    
zArabicStemmer.__Prefix_Step2ac                 C   sL   | j D ]@}||rt|dkr|d d | jkr|t|d  } qHq|S )Nr=   rH   )_ArabicStemmer__prefix_step2br   r@   _ArabicStemmer__prefixes1r   r!   r!   r"   Z__Prefix_Step2b  s    
zArabicStemmer.__Prefix_Step2bc                 C   st   | j D ]h}||r|| jkrDt|dkrD|t|d  }d| _ qp|| jkrt|dkr|t|d  } qpq|S )Nr   Tr   )"_ArabicStemmer__prefix_step3a_nounr   r   r@   prefix_step3a_noun_successr   r   r!   r!   r"   Z__Prefix_Step3a_Noun  s    

z"ArabicStemmer.__Prefix_Step3a_Nounc                 C   s   | j D ]}||rt|dkrf|dkrB|t|d  }d| _ q|| jkrft|||d }d| _ q|| jkrt|dkr|t|d  }d| _ qq|S )Nr=   ru   Tr<   r   )"_ArabicStemmer__prefix_step3b_nounr   r@   prefix_step3b_noun_success_ArabicStemmer__prepositions2r   _ArabicStemmer__prepositions1r   r!   r!   r"   Z__Prefix_Step3b_Noun	  s     


z"ArabicStemmer.__Prefix_Step3b_Nounc                 C   s:   | j D ].}||rt|dkrt|||d } q6q|S )Nr   r<   )!_ArabicStemmer__prefix_step3_verbr   r@   r   r   r!   r!   r"   Z__Prefix_Step3_Verb  s
    
z!ArabicStemmer.__Prefix_Step3_Verbc                 C   sB   | j D ]6}||rt|dkrt||d}d| _d| _ q>q|S )Nr   u   استTF)!_ArabicStemmer__prefix_step4_verbr   r@   r   r   r   r   r!   r!   r"   Z__Prefix_Step4_Verb$  s    
z!ArabicStemmer.__Prefix_Step4_Verbc                 C   sF  d| _ d| _d| _d| _d| _d| _d| _d| _d| _d| _	d| _
d| _d| _|}| | | | | |}|| jkst|dkr|S | j r| |}| jr| |}| js| |}n| |}| js| |}| jr| |}| js| js:| |}| |}| js| |}| js| js| |}nf| |}| j	r| |}| jsh| |}| js| js| |}n| js| |}| |}| |}| js| j r| |}| |}|  |}| j
s| !|}| "|}| js| jr| #|}n$| js4| j r4| $|}| %|}| &|}|}|S )zw
        Stem an Arabic word and return the stemmed form.

        :param word: string
        :return: string
        TFrH   )'r   r   r   r   r   r   r   r   r   r   r   r   r   _ArabicStemmer__checks_1_ArabicStemmer__checks_2_ArabicStemmer__normalize_prer   r@   !_ArabicStemmer__Suffix_Verb_Step1r   "_ArabicStemmer__Suffix_Verb_Step2a"_ArabicStemmer__Suffix_Verb_Step2c"_ArabicStemmer__Suffix_Verb_Step2b#_ArabicStemmer__Suffix_Noun_Step2c2"_ArabicStemmer__Suffix_Noun_Step1a"_ArabicStemmer__Suffix_Noun_Step2a"_ArabicStemmer__Suffix_Noun_Step2b#_ArabicStemmer__Suffix_Noun_Step2c1"_ArabicStemmer__Suffix_Noun_Step1b!_ArabicStemmer__Suffix_Noun_Step3'_ArabicStemmer__Suffix_All_alef_maqsura_ArabicStemmer__Prefix_Step1_ArabicStemmer__Prefix_Step2a_ArabicStemmer__Prefix_Step2b"_ArabicStemmer__Prefix_Step3a_Noun"_ArabicStemmer__Prefix_Step3b_Noun!_ArabicStemmer__Prefix_Step3_Verb!_ArabicStemmer__Prefix_Step4_Verb_ArabicStemmer__normalize_post)r   r4   Zmodified_wordZstemmed_wordr!   r!   r"   r   -  s    
























zArabicStemmer.stemN)Wr'   r(   r)   r*   recompilery   r{   r|   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r!   r!   r!   r"   rL   4  s   





			rL   c                   @   s4   e Zd ZdZdZdZdZdZdZdZ	dZ
d	d
 ZdS )DanishStemmera  
    The Danish Snowball stemmer.

    :cvar __vowels: The Danish vowels.
    :type __vowels: unicode
    :cvar __consonants: The Danish consonants.
    :type __consonants: unicode
    :cvar __double_consonants: The Danish double consonants.
    :type __double_consonants: tuple
    :cvar __s_ending: Letters that may directly appear before a word final 's'.
    :type __s_ending: unicode
    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
    :type __step1_suffixes: tuple
    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
    :type __step2_suffixes: tuple
    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
    :type __step3_suffixes: tuple
    :note: A detailed description of the Danish
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/danish/stemmer.html

       aeiouyæåøbcdfghjklmnpqrstvwxzbbccddffgghhjjkkllmmnnppZqqrrssttvvZwwZxxzzu   abcdfghjklmnoprtvyzå) ZerendesZerendeZhedensZethedZeredehedenZhederendesZernesZerensZeretsZeredendeZerneZerenZererZhedseneseresZeretZhedeneereensersetseneresetr5   s)gddtgtkt)elig   løstligelsigc                 C   s  |  }|| jkr|S | || j}| jD ]f}||r*|dkrh|d | jkr|dd }|dd }n$|dt|  }|dt|  } qq*| jD ]*}||r|dd }|dd } qq|dr|dd }|dd }| j	D ]}||r|dkr|dd }|dd }nJ|dt|  }|dt|  }|| jrh|dd }|dd } qpq| j
D ]2}||rvt|dkrv|dd } qqv|S )z
        Stem a Danish word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

        r   r   Nr   Zigstr  r=   )r-   r   rD   _DanishStemmer__vowels_DanishStemmer__step1_suffixesr.   _DanishStemmer__s_endingr@   _DanishStemmer__step2_suffixes_DanishStemmer__step3_suffixes!_DanishStemmer__double_consonants)r   r4   rB   r   double_consr!   r!   r"   r     sJ    










zDanishStemmer.stemN)r'   r(   r)   r*   r  Z_DanishStemmer__consonantsr  r  r  r	  r
  r   r!   r!   r!   r"   r     s   "r   c                   @   s$   e Zd ZdZdZdZdZdd ZdS )DutchStemmera  
    The Dutch Snowball stemmer.

    :cvar __vowels: The Dutch vowels.
    :type __vowels: unicode
    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
    :type __step1_suffixes: tuple
    :cvar __step3b_suffixes: Suffixes to be deleted in step 3b of the algorithm.
    :type __step3b_suffixes: tuple
    :note: A detailed description of the Dutch
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/dutch/stemmer.html

    u   aeiouyè)r   r   r   ser   )baarlijkbarendingr  c                 C   s  |  }|| jkr|S d}|dddddddddd	d
d	dddddddd}|drdd|dd f}tdt|D ]F}||d  | jkr|| dkrd|d| d||d d f}qtdt|d D ]X}||d  | jkr|| d	kr||d  | jkrd|d| d||d d f}q| || j\}}tdt|D ]}|| | jkrp||d  | jkrpdt|d|d    krdkrn n|dd }n"t|d|d  dkr|  S  qqp| j	D ]}|
|r|dkrTt||d}t||d}|
drt||d}n,|dkr|
ds|t| d  | jkr|t| d t|  dkr|dt|  }|dt|  }|dt|  }|
dr|dd }|dd }|dd }nr|dkr|t| d  | jkr|t| d  d kr|dt|  }|dt|  }|dt|  } qq|
dr|d! | jkrd"}|dd }|dd }|dd }|
dr|dd }|dd }|dd }|
dr|d# d$kr|dd% }|dd% }|dd% }|
d&r|d' | jkr|d#d! dkr|dd! }|dd! }|dd! }|
dr|dd }|dd }|dd }| jD ]:}|
|r|d(kr:|dd' }|dd' }|
d)r |d' dkr |dd! }n|
dr|dd }n|d)kr`|d' dkr`|dd! }n|d*kr|dd% }|dd% }|
dr|d! | jkr|dd }|
dr|dd }n4|d+kr|dd% }n|d,kr|r|dd' } qqt|d-krp|d | jkrp|d dkrp|d'd d.krp|d% | jkrpd|dd' |d' |d f}|dd	dd}|S )/z
        Stem a Dutch word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

        F   äa   á   ër5      é   írC      ï   öo   ó   üu   úyr;   Yr<   NIr=   r   r   Zheid)r   r   gem)r   r   r   r   )r  r   jr   Tcr   r   )r  r  r  r  r  r  r   )aaeeoouu)r-   r   replacer   joinr?   r@   _DutchStemmer__vowelsrG   _DutchStemmer__step1_suffixesr.   r   _DutchStemmer__step3b_suffixes)r   r4   step2_successrC   rB   rF   r   r!   r!   r"   r   G  s   

        	 
&
&$,






 zDutchStemmer.stemN)r'   r(   r)   r*   r/  r0  r1  r   r!   r!   r!   r"   r  2  s
   r  c                )   @   s   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZddddddddddddddddddddddddd d d!d!d"d"d"d"d#d#d#d#d$d$d$d$d%(Zd&d' Zd(S ))EnglishStemmera  
    The English Snowball stemmer.

    :cvar __vowels: The English vowels.
    :type __vowels: unicode
    :cvar __double_consonants: The English double consonants.
    :type __double_consonants: tuple
    :cvar __li_ending: Letters that may directly appear before a word final 'li'.
    :type __li_ending: unicode
    :cvar __step0_suffixes: Suffixes to be deleted in step 0 of the algorithm.
    :type __step0_suffixes: tuple
    :cvar __step1a_suffixes: Suffixes to be deleted in step 1a of the algorithm.
    :type __step1a_suffixes: tuple
    :cvar __step1b_suffixes: Suffixes to be deleted in step 1b of the algorithm.
    :type __step1b_suffixes: tuple
    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
    :type __step2_suffixes: tuple
    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
    :type __step3_suffixes: tuple
    :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm.
    :type __step4_suffixes: tuple
    :cvar __step5_suffixes: Suffixes to be deleted in step 5 of the algorithm.
    :type __step5_suffixes: tuple
    :cvar __special_words: A dictionary containing words
                           which have to be stemmed specially.
    :type __special_words: dict
    :note: A detailed description of the English
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/english/stemmer.html
    Zaeiouy)	r   r   r   r   r   r   r   r   r   Z
cdeghkmnrt)z's'z's')ssesiediesusr   r   )eedlyZinglyZedlyeedr  ed)izationationalfulnessousnessivenesstionalbilitilesslientliationalismalitiousliivitifulliencianciabliizeratorallibliogili)	r=  rA  alizeicateicitiativeicalnessful)ementanceenceableiblementantentismateitiousiveizeionalr   ic)r5   lskiskyZdieZlieZtieidlZgentlZugliZearliZonliZsinglnewshoweatlascosmosbiasandesinningoutingcanningherringearringproceedexceedsucceed)(skisZskiesZdyingZlyingZtyingZidlyZgentlyZuglyZearlyonlyZsinglyrn  rp  rq  rr  rs  rt  ru  rv  Zinningsrw  Zoutingsrx  Zcanningsry  Zherringsrz  Zearringsr{  ZproceedsZ	proceededZ
proceedingr|  ZexceedsZexceededZ	exceedingr}  ZsucceedsZ	succeededZ
succeedingc           	      C   sB  |  }|| jkst|dkr"|S || jkr6| j| S |dddddd}|drh|dd }|drd	d
|dd f}tdt|D ]F}||d  | jkr|| dkrd	|d| d
||d d f}qd}d}d	}d	}|drr|dr|dd }n|dd }tdt|D ]@}|| | jkr.||d  | jkr.||d d } qq.n| 	|| j\}}| j
D ]N}||r|dt|  }|dt|  }|dt|  } qڐq| jD ]}||r|dkr"|dd }|dd }|dd }n|dkrt|dt|  dkrn|dd }|dd }|dd }n$|dd }|dd }|dd }n^|dkr|dd D ]}|| jkrd} qȐq|r|dd }|dd }|dd } qq| jD ]P}||r|dkr||rLt||d}t|t|krVt||d}nd	}t|t|krzt||d}nd	}n|dt|  D ]}|| jkrd} qq|rL|dt|  }|dt|  }|dt|  }|drDd	|df}d	|df}t|dks2t|dkrLd	|df}n|| jrx|dd }|dd }|dd }n|d	krt|dkr|d | jkr|d dkr|d | jkr|d | jks|d	krLt|dkrL|d | jkrL|d | jkrLd	|df}t|dkr0d	|df}t|dkrLd	|df} qVqt|dkr|d dkr|d | jkrd	|dd df}t|dkrd	|dd df}nd	}t|dkrd	|dd df}nd	}| jD ](}||r||
r|d krB|dd }|dd }|dd }n|d!krd	|dd df}t|dkrd	|dd df}nd	}t|dkrd	|dd df}nd	}nZ|d"kr|dd }|dd }|dd }n(|d#krNt||d$}t|t|kr"t||d$}nd	}t|t|krFt||d$}nd	}n|d%krt||d&}t|t|krt||d&}nd	}t|t|krt||d&}nd}nd|d'krt||d(}t|t|krt||d(}nd	}t|t|kr
t||d(}nd	}n|d)krD|dd* }|dd* }|dd* }n|d+krt||d,}t|t|krzt||d,}nd	}t|t|krt||d,}nd	}nn|d-k	rt||d.}t|t|krt||d.}nd	}t|t|k	r t||d.}nd}n|d/k	rht||d0}t|t|k	r>t||d0}nd	}t|t|k	rbt||d0}nd	}n|d1k	r|d* d2k	r|dd }|dd }|dd }nn|d3k	r|dd }|dd }|dd }n>|d4k
r|d | jk
r|dd }|dd }|dd } 
qq| jD ]}||
r$||r|d k
rt|dd }|dd }|dd }nn|d5k
rt||d&}t|t|k
rt||d&}nd	}t|t|k
rt||d&}nd	}n|d6kr|dd }|dd }|dd }n|d7krft||d8}t|t|kr<t||d8}nd	}t|t|kr`t||d8}nd	}n||d9kr|dt|  }|dt|  }|dt|  }n:|d:kr||r|dd; }|dd; }|dd; } q
q$| jD ]}||r||r|d<krL|d* d=kr|dd }|dd }|dd }n6|dt|  }|dt|  }|dt|  } qq|d2r|d d2kr|dd }n~|dr|dd }nd|dr2t|d>kr2|d | jks&|d dks&|d | jks&|d* | jkr2|dd }|d
d}|S )?z
        Stem an English word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

        rH   u   ’r4  u   ‘u   ‛r<   Nr!  r;   r"  F)generZcommunarsen)r  r  r   r   r5  r   )r6  r7  r   r   T)r:  r9  r*  )atblZizr5   r=   ZwxYr   r   ZyYrC   rA  )rK  rL  rM  rD  )rN  r<  rh  )r=  rE  rO  rd  )rF  rG  rP  rj  r>  r(  )rH  r?  rf  )r@  rI  rg  )rB  rQ  ZblerR  rl  )rJ  rC  rS  r=  rT  )rU  rV  rX  rk  )rZ  rY  rW  r&  ri  str   )r-   r   r@   _EnglishStemmer__special_wordsr-  r   r.  r?   _EnglishStemmer__vowelsrG   _EnglishStemmer__step0_suffixesr.    _EnglishStemmer__step1a_suffixes _EnglishStemmer__step1b_suffixesr   "_EnglishStemmer__double_consonants_EnglishStemmer__step2_suffixes_EnglishStemmer__li_ending_EnglishStemmer__step3_suffixes_EnglishStemmer__step4_suffixes)	r   r4   rC   Zstep1a_vowel_foundZstep1b_vowel_foundrB   rF   r   letterr!   r!   r"   r   ~  s$   

  

&$










	


,





















zEnglishStemmer.stemN)r'   r(   r)   r*   r  r  r  r  r  r  r  r  r  Z_EnglishStemmer__step5_suffixesr  r   r!   r!   r!   r"   r3    sj   +r3  c                   @   s<   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
Zdd ZdS )FinnishStemmeraS  
    The Finnish Snowball stemmer.

    :cvar __vowels: The Finnish vowels.
    :type __vowels: unicode
    :cvar __restricted_vowels: A subset of the Finnish vowels.
    :type __restricted_vowels: unicode
    :cvar __long_vowels: The Finnish vowels in their long forms.
    :type __long_vowels: tuple
    :cvar __consonants: The Finnish consonants.
    :type __consonants: unicode
    :cvar __double_consonants: The Finnish double consonants.
    :type __double_consonants: tuple
    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
    :type __step1_suffixes: tuple
    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
    :type __step2_suffixes: tuple
    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
    :type __step3_suffixes: tuple
    :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm.
    :type __step4_suffixes: tuple
    :note: A detailed description of the Finnish
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/finnish/stemmer.html
    u
   aeiouyäöu	   aeiouäö)r)  r*  iir+  r,  u   ääu   öör   r   )
Zkaanu   käänstiZkinhan   hänkou   köpau   pä)	Znsau   nsäZmmeZnnesinian   änr   )siinttenseenr  henhinhonr     höndentta   ttässa   ssästa   ställa   llälta   ltälleksiineta   täna   när  r  n)ZimpiZimpau   impäZimmiimmau   immämpimpa   mpämmimma   mmäZejau   ejäc                 C   s  |  }|| jkr|S d}| || j\}}| jD ]}||r2|dkrv||kr|dd }|dd }|dd }nL|t| d  dkr|dt|  }|dt|  }|dt|  } qq2| jD ]}||r|dkr|d dkr|dd	 }|dd	 }|dd	 }n|d
kr|dd	 }|dd	 }|dd	 }|drdt|dd}|dr|t|dd}|drt|dd}n|dkr|dd	 dks|dd	 dkr|dd	 }|dd	 }|dd	 }n|dkr@|dd	 dks|dd	 dkr|dd	 }|dd	 }|dd	 }nf|dkr|dd	 dkr|dd	 }|dd	 }|dd	 }n$|dd }|dd }|dd } qq| j	D ]}||r|dkr|dkr|d dks`|dkr |d dks`|dkr|d dks`|dkr0|d dks`|d krH|d d!ks`|d"kr|d d#kr|dd }|dd }|dd }d$}n|d%kr|t| d  dkr|t| d&  | j
kr|dt|  }|dt|  }|dt|  }d$}nqn|d'kr\|d(d | jkr|dd }|dd }|dd }d$}nqnN|d)kr|d	 | jkr|d | jkr|dd* }|dd* }|dd* }d$}n|d+kr|d dkr|dd }|dd }|dd }d$}n|d,krp|dd* }|dd* }|dd* }d$}|d	d d-ksJ|d	d | jkr|dd* }|dd* }|dd* }n:|dt|  }|dt|  }|dt|  }d$} qq| jD ]}||r|d.kr|dd d/krB|dd }|dd }|dd }n6|dt|  }|dt|  }|dt|  } qLq|rt|dkr|d* d0kr|dd* }|dd* }n|s:t|d&kr:|d* d1kr:|d	 | jkr:|dd* }|dd* }|dd* }|d2r|dd }|dd }n6|d3r:|dd d/kr:|dd }|dd }|d	d | jkrf|dd* }|dd* }t|d&kr|d	 | jkr|d* d4kr|dd* }|dd* }|d5r|dd* }|dd* }|d6r|dd* }|dd* }tdt|D ]}||  | jkrq n~|dkrN|| d d | jkr|dd* }nF|| d | d  | jkrd7|d|  || d d f} qq |S )8z
        Stem a Finnish word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

        Fr  Nr   r<   u   ntaeiouyäör  kr   r  Zkser  r  r(  )r  r  r&  )r  r  r  r  r  )r  r  )r  r  r  r  r   )r  r  )r  r  r  r  r  r  r  r  r  r5   r  rC   r  r  r  r  r  r  T)r  r  r  rH   r  )r  r  r   )r  r  r  ie)r  r  r  r  r  r  poZijtr  r  u   aäei)ZojZujZjor;   )r-   r   rG   _FinnishStemmer__vowels_FinnishStemmer__step1_suffixesr.   r@   _FinnishStemmer__step2_suffixesr   _FinnishStemmer__step3_suffixes"_FinnishStemmer__restricted_vowels_FinnishStemmer__long_vowels_FinnishStemmer__consonants_FinnishStemmer__step4_suffixesr?   "_FinnishStemmer__double_consonantsr.  )r   r4   Zstep3_successrB   rF   r   rC   r!   r!   r"   r   o  sh   







$
$










 

&



"

,
 &
zFinnishStemmer.stemN)r'   r(   r)   r*   r  r  r  r  r  r  r  r  r  r   r!   r!   r!   r"   r    s    r  c                   @   s4   e Zd ZdZdZdZdZdZdZdd Z	d	d
 Z
dS )FrenchStemmera  
    The French Snowball stemmer.

    :cvar __vowels: The French vowels.
    :type __vowels: unicode
    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
    :type __step1_suffixes: tuple
    :cvar __step2a_suffixes: Suffixes to be deleted in step 2a of the algorithm.
    :type __step2a_suffixes: tuple
    :cvar __step2b_suffixes: Suffixes to be deleted in step 2b of the algorithm.
    :type __step2b_suffixes: tuple
    :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm.
    :type __step4_suffixes: tuple
    :note: A detailed description of the French
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/french/stemmer.html
    u   aeiouyâàëéêèïîôûù)+	issementsissementatricesatriceateursationslogiesusionsutionsementsammentemmentancesiqUesismesablesistesateurrE  logieusionutionencesr[  eusesmentsr\  iqUeismer^  ister]     itésiveseauxeuser`  eux   itérg  ifsauxif)#ZissaIentZissantesZiraIentZissanteZissantsZissionsZirionsZissaisZissaitZissantZissentZissiezZissonsZiraisZiraitZirentZiriezZironsZirontissesZissezu   îmesu   îtesiraiirasZirezisser7  irau   îtr  irisitrC   )&eraIentassionserionsassentassiez   èrenteraiseraiteriezeronserontaIentantesassesionseraieraserez   âmes   âtesanteantsasse   éeseraiezaisaitra     ée   ésr   ez   âtaiasr  r  )   ière   Ièreri  ierIerr5   r  c                 C   s<
  |  }|| jkr|S d}d}d}d}tdt|D ]D}||d  dkr4|| dkr4d|d| d||d d f}q4tdt|d D ]}||d  | jkr||d  | jkr|| dkrd|d| d||d d f}n2|| dkrd|d| d	||d d f}||d  | jksB||d  | jkr|| d
krd|d| d||d d f}q| || j\}}| || j}	| jD ]v}
|	|
r|
dkr|dd }d}n@|
dkr|
|kr|dt|
  }d}n|
|kr
t
||
d}d}n|
dkrP|
|	krP|dt|
  }d}|dd dkrd|kr|dd }|dd dkrLd|krL|dd }n|dd dkrd|kr|dd }n d|krLd|dd df}np|dd dkrd|ksd|krL|dd }n<|dd dkr
d|	ks6d|	kr
d|dd df}n|
dkr|
|	krt
|dd}t
|	dd}	d}n|
d kr|
|	krt
|d d!}d}n^|
d"kr|
|	kr|	|
s|	|	|
d  | jkr|dt|
  }|	dt|
  }	d}n|
d#krD|
|krDd|dd d$f}d}n|
d%kr|
|kr|t|
 d  | jkr|dt|
  }d}n~|
d&kr|
|kr|dt|
  }d}nP|
d'kr(|
|kr(|dt|
  }d}|dd d(kr
d(|kr|dd }nd|dd df}n|
d)krP|
|krPt
||
d*}d}n|
d+krx|
|krxt
||
d}d}n|
d,kr|
|krt
||
d!}d}nj|
d-krx|
|krx|dt|
  }d}|d.d d/krd/|kr|dd. }nd|dd d$f}nj|dd d(krNd(|kr6|dd }nd|dd df}n(|dd dkr
d|kr
|dd }n|
d0kr
|
|kr
|dt|
  }d}|dd dkr
d|kr
|dd }|dd d(kr
d(|kr|dd }nd|dd df} qq|r |rD| jD ]d}
|	|
r&|
|	krt|	t|
kr|	|	|
d  | jkr|dt|
  }d} qq&|sD| jD ]}
|		|
r|
d1krd1|kr|dd. }d}nl|
d2kr|dt|
  }d}nJ|
d3kr:|dt|
  }|	dt|
  }	d}|		d4r:|dd } qDq|sV|sV|r|d dkr|d|dd df}n$|d d5k	rd|dd d6f}nt|d7kr|d d8kr|d d9kr|dd }| jD ]}
|	|
r|
|	kr|
d:k	r(|
|k	r(|	d. d;k	r(|dd }nX|
d<k	r@t
||
d}n@|
d4k	rX|dd }n(|
d=k	r|dd d>k	r|dd } 	qq|	d?	r|dd }tdt|D ]j}||  | jk	r|d7 }nH|dk
r||  d@k
rd|d|  d4|| d d f} 
q	q|d	ddddd
}|S )Az
        Stem a French word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

        Fr<   qr  r;   NUrC   r#  r!  r"  r  r   T)r  r  r  )r[  r  r   ivr  r   Zeusx)abliqUr!  r"  )   ièr   Ièrr#  r$  r  ra  r  rb  )r`  r  r  rl  )r  r  )r\  r  r  r^  r  r  r  r  r  r  r  )r  r  rE  r  r  r  rk  )r  r  log)r  r  r  r  )r]  r  )r  r  r(  abil)r  rg  r  r  r  )r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r   r  r  )r  r  r  r  r  r  r	  r
  r  r  r  r  r  ra  r  r  r  r  r5      çr'  rH   r   u   aiouèsri  r  )r  r  r  r  r  gu)ZennZonnZettZellZeill)r     è)r-   r   r?   r@   r.  _FrenchStemmer__vowelsrG   _FrenchStemmer__rv_french_FrenchStemmer__step1_suffixesr.   r   r   rindex_FrenchStemmer__step2a_suffixes_FrenchStemmer__step2b_suffixes_FrenchStemmer__step4_suffixesr-  )r   r4   step1_successZrv_ending_foundZstep2a_successZstep2b_successrC   rB   rF   rJ   r   r!   r!   r"   r     s|   

&(&$&&

















*

"



(
zFrenchStemmer.stemc                 C   sx   d}t |dkrt|ds2|d |kr@|d |kr@|dd }n4tdt |D ]$}|| |krN||d d } qtqN|S )a  
        Return the region RV that is used by the French stemmer.

        If the word begins with two vowels, RV is the region after
        the third letter. Otherwise, it is the region after the first
        vowel not at the beginning of the word, or the end of the word
        if these positions cannot be found. (Exceptionally, u'par',
        u'col' or u'tap' at the beginning of a word is also taken to
        define RV as the region to their right.)

        :param word: The French word whose region RV is determined.
        :type word: str or unicode
        :param vowels: The French vowels that are used to determine
                       the region RV.
        :type vowels: unicode
        :return: the region RV for the respective French word.
        :rtype: unicode
        :note: This helper method is invoked by the stem method of
               the subclass FrenchStemmer. It is not to be invoked directly!

        r;   rH   )parcoltapr   r<   r=   N)r@   r   r?   rI   r!   r!   r"   Z__rv_french
  s    


zFrenchStemmer.__rv_frenchN)r'   r(   r)   r*   r*  r,  r.  r/  r0  r   r+  r!   r!   r!   r"   r  R  s   -%(  :r  c                   @   s0   e Zd ZdZdZdZdZdZdZdZ	dd	 Z
d
S )GermanStemmeraD  
    The German Snowball stemmer.

    :cvar __vowels: The German vowels.
    :type __vowels: unicode
    :cvar __s_ending: Letters that may directly appear before a word final 's'.
    :type __s_ending: unicode
    :cvar __st_ending: Letter that may directly appear before a word final 'st'.
    :type __st_ending: unicode
    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
    :type __step1_suffixes: tuple
    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
    :type __step2_suffixes: tuple
    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
    :type __step3_suffixes: tuple
    :note: A detailed description of the German
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/german/stemmer.html

    u   aeiouyäöüZbdfghklmnrtZ
bdfghklmnt)ernemr   r   r   r5   r   )Zestr   r   r  )ischlichheitkeitr  ungr  ikc                 C   s8  |  }|| jkr|S |dd}tdt|d D ]}||d  | jkr4||d  | jkr4|| dkrd|d| d||d d f}q4|| dkr4d|d| d	||d d f}q4| || j\}}tdt|D ]}|| | jkr||d  | jkrd
t|d|d    kr,dkr>n n|d
d }n"t|d|d  dkr`|  S  qhq| jD ]}|	|rn|dkr|t| d t|  dkr|dt| d  }|dt| d  }|dt| d  }nv|dkr.|d | j
krd|dd }|dd }|dd }n6|dt|  }|dt|  }|dt|  } qnqn| jD ]}|	|rt|dkr|d | jkrt|dd d
kr|dd }|dd }|dd }n6|dt|  }|dt|  }|dt|  } qqt| jD ]}|	|r |dkrd|t| d t|  krd|t| d
 t| d  kr|dt| d  }n|dt|  }nL|dkrd|t| d t|  kr|dt|  }n
|dkrnd|t| d t|  ksBd|t| d t|  krZ|dt| d  }n|dt|  }n|dkrd|t| d t|  kr|dt| d  }nLd|t| d t|  kr|dt| d  }n|dt|  } qq |ddd d!d"dddd	d}|S )#z
        Stem a German word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

           ßr   r<   r  r;   Nr  r!  r"  r=   r   )r   r   r5   r   Znissr   r   r   r  r   )r  r<  r  rH   r5   )r  r=  r8  )r9  r:  r   r   r;  r9  r  r  r  r  r  )r-   r   r-  r?   r@   _GermanStemmer__vowelsr.  rG   _GermanStemmer__step1_suffixesr.   _GermanStemmer__s_ending_GermanStemmer__step2_suffixes_GermanStemmer__st_ending_GermanStemmer__step3_suffixes)r   r4   rC   rB   rF   r   r!   r!   r"   r   _
  s    

$&& ,




&

"

""
    zGermanStemmer.stemN)r'   r(   r)   r*   r?  rA  rC  r@  rB  rD  r   r!   r!   r!   r"   r5  @
  s   r5  c                   @   sP   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdd Zdd ZdS )HungarianStemmera  
    The Hungarian Snowball stemmer.

    :cvar __vowels: The Hungarian vowels.
    :type __vowels: unicode
    :cvar __digraphs: The Hungarian digraphs.
    :type __digraphs: tuple
    :cvar __double_consonants: The Hungarian double consonants.
    :type __double_consonants: tuple
    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
    :type __step1_suffixes: tuple
    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
    :type __step2_suffixes: tuple
    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
    :type __step3_suffixes: tuple
    :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm.
    :type __step4_suffixes: tuple
    :cvar __step5_suffixes: Suffixes to be deleted in step 5 of the algorithm.
    :type __step5_suffixes: tuple
    :cvar __step6_suffixes: Suffixes to be deleted in step 6 of the algorithm.
    :type __step6_suffixes: tuple
    :cvar __step7_suffixes: Suffixes to be deleted in step 7 of the algorithm.
    :type __step7_suffixes: tuple
    :cvar __step8_suffixes: Suffixes to be deleted in step 8 of the algorithm.
    :type __step8_suffixes: tuple
    :cvar __step9_suffixes: Suffixes to be deleted in step 9 of the algorithm.
    :type __step9_suffixes: tuple
    :note: A detailed description of the Hungarian
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/hungarian/stemmer.html

    u   aeiouöüáéíóõúû)csZdzZdzsgyZlynytyZzs)r   r   Zccsr   r   r   Zggyr   r   r   Zllyr   r   Znnyr   r   r   Zsszr   ttyr   r   Zzzs)rj  el),u   képpenu   onkéntu   enkéntu   ankéntu   képpu   kéntZbanZbenZnakZnekvalZvelu   tólu   tõlu   rólu   rõlu   bólu   bõlZhozZhezu   hözu   nálu   nélu   értZkorbaberar   r  r  r   Zotu   ötulu   ülu   váu   vér   onr  u   önr  r  )u   ánkéntu   án   én)Zastulu   estül   ástul   éstülZstulu   stül)r  r  )u   okéu   ökéu   akéu   eké   áké   áéi   éké   ééiu   ké   éi   éér  )   ájuk   éjüku   ünkZunkZjuku   jük   ánk   énkZnkuku   ükr7  omamZodr;  adu   ödjaZje   ám   ád   ém   édmdr  r5   r  r  r  )*ZjaitokZjeitekZjainkZjeinkZaitokZeitek   áitok   éitekZjaimZjeimZjaidZjeidZeinkZainkZitekZjeikZjaik   áink   éinkZaimZeimZaidZeidZjaiZjeiZinkZaikZeik   áim   áid   áik   éim   éid   éikimidr  eir=     áirY  rC   )   ák   éku   ökokZekZakr  c                 C   s  |  }|| jkr|S | || j| j}|| jr| jD ]l}|dt| d |kr:d	|dd |d f}|dt| d |krd	|dd |d f} qq:| j
D ]}||r||rJ|dt|  }|dt|  }|drd	|dd df}t|dd}n.|d	rJd	|dd d
f}t|d	d
} qRq| jD ]T}||rX|dkrt||d
}t||d
}nt||d}t||d} qqX| jD ]}||r|dkrt||d}t||d}nH|dkrt||d
}t||d
}n$|dt|  }|dt|  } q:q| jD ]}||r@| jD ]t}|dt| d |krVd	|dd |d f}|dt| d |krd	|dd |d f} q@qVq@| jD ]}||r|dkr
t||d}t||d}nH|dkr.t||d
}t||d
}n$|dt|  }|dt|  } q\q| jD ]}||rb||r|dkrt||d}t||d}nH|dkrt||d
}t||d
}n$|dt|  }|dt|  } qqb| jD ]}||r||r|dkr:t||d}t||d}nH|dkr^t||d
}t||d
}n$|dt|  }|dt|  } qq| jD ]f}||r||r|dkrt||d}n*|dkrt||d
}n|dt|  } qq|S )z
        Stem an Hungarian word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

        r   r;   Nr(  r   r  r   r  r  r5   rR  rS  rT  )rU  rV  )rW  rX  rZ  )r]  r[  rd  re  r  )r^  r\  rf  rg  r  )rn  ro  rw  rl  rj  rp  )rq  rr  rY  rm  rk  rs  rx  ry  )r-   r   _HungarianStemmer__r1_hungarian_HungarianStemmer__vowels_HungarianStemmer__digraphsr.   !_HungarianStemmer__step1_suffixes$_HungarianStemmer__double_consonantsr@   r.  !_HungarianStemmer__step2_suffixesr   !_HungarianStemmer__step3_suffixes!_HungarianStemmer__step4_suffixes!_HungarianStemmer__step5_suffixes!_HungarianStemmer__step6_suffixes!_HungarianStemmer__step7_suffixes!_HungarianStemmer__step8_suffixes!_HungarianStemmer__step9_suffixes)r   r4   rB   r  r   r!   r!   r"   r     s    





























zHungarianStemmer.stemc                 C   s   d}|d |kr|D ]6}||dd kr|| |d d d }|  S qtdt|D ]$}|| |krZ||d d } qqZn4tdt|D ]$}|| |kr||d d } qq|S )a?  
        Return the region R1 that is used by the Hungarian stemmer.

        If the word begins with a vowel, R1 is defined as the region
        after the first consonant or digraph (= two letters stand for
        one phoneme) in the word. If the word begins with a consonant,
        it is defined as the region after the first vowel in the word.
        If the word does not contain both a vowel and consonant, R1
        is the null region at the end of the word.

        :param word: The Hungarian word whose region R1 is determined.
        :type word: str or unicode
        :param vowels: The Hungarian vowels that are used to determine
                       the region R1.
        :type vowels: unicode
        :param digraphs: The digraphs that are used to determine the
                         region R1.
        :type digraphs: tuple
        :return: the region R1 for the respective word.
        :rtype: unicode
        :note: This helper method is invoked by the stem method of the subclass
               HungarianStemmer. It is not to be invoked directly!

        r;   r   r<   Nr   )indexr?   r@   )r   r4   rA   ZdigraphsrB   ZdigraphrC   r!   r!   r"   Z__r1_hungarianL  s    
zHungarianStemmer.__r1_hungarianN)r'   r(   r)   r*   r|  r}  r  r~  r  r  r  r  r  r  r  r  r   r{  r!   r!   r!   r"   rE  
  s    !.!, rE  c                   @   s(   e Zd ZdZdZdZdZdZdd ZdS )	ItalianStemmeraj  
    The Italian Snowball stemmer.

    :cvar __vowels: The Italian vowels.
    :type __vowels: unicode
    :cvar __step0_suffixes: Suffixes to be deleted in step 0 of the algorithm.
    :type __step0_suffixes: tuple
    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
    :type __step1_suffixes: tuple
    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
    :type __step2_suffixes: tuple
    :note: A detailed description of the Italian
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/italian/stemmer.html

    u   aeiouàèìòù)%ZglielaZglieleZglieliZglieloZglieneZseneZmelaZmeleZmeliZmeloZmeneZtelaZteleZteliZteloZteneZcelaZceleZceliZceloZceneZvelaZveleZveliZveloZveneZglicilalerS  lominer  tivi)3r  Zatriciazioneazioniuzioneuzioniusioneusioniamentoamentiimentoimentiamenteabileabiliibileibilimenteatoreatorilogiar  anzaZanzeZicheZichiismoZismiistar  istiu   istàu   istèu   istìr  antienzaenzeicoiciicaiceosoosiosaZose   itàivoiviivarg  )WZerebberoZirebberoZasseroZassimoZerannoZerebbeZeremmoZeresteZerestiZesseroZirannoZirebbeZiremmoZiresteZirestiZiscanoZisconoZisseroZaronoZavamoZavanoZavateZeremoZereteZeronoZevamoZevanoZevateZiremoZireteZironoZivamoZivanoZivateZammoandor  ZassiZemmoZendar   Zendiendor  ereiZYamoZiamoZimmor  ireiZiscaZisceZisciZiscoZanoareatard  atiZatoavaZaviZavou   eràr   u   eròeteZevaZeviZevou   iràireu   iròitaitere  Zitor  r  r  ZonoutauteutiZutoarr  c                 C   s@  |  }|| jkr|S d}|dddddddd	d
d}tdt|D ]D}||d  dkrT|| dkrTd|d| d||d d f}qTtdt|d D ]}||d  | jkr||d  | jkr|| dkrd|d| d||d d f}q|| dkrd|d| d||d d f}q| || j\}}| || j}| j	D ]}|
|r`|t| d t|  dkr|dt|  }|dt|  }|dt|  }|dt|  }nR|t| d t|  dkr.t||d}t||d}t||d}t||d} q8q`| jD ]<}|
|r>|dkr2|
|r2d}|dd }|dd }|dd }|
dr|dd }|dd }|dd }|
dr.|dd }|dd }nJ|
dr
|dd }|dd }n$|
d rt|dd! }|dd! }nB|d"krh|
|rhd}|dd }|dd }n|
|rtd}|d#kr|dt|  }|dt|  }|dt|  }|
d$rt|dd }|dd }n|d%kr|dd }|dd }nn|d&kr,|dd' }|dd' }nH|d(krRt||d)}t||d)}n"|d*kr|dd+ }|dd+ }|dd+ }|
d,r|dd }|dd }n$|
d rt|dd! }|dd! }n|d-krP|dd+ }|dd+ }|dd+ }|
drt|dd }|dd }|dd }|
d$rt|dd }|dd }n$|dt|  }|dt|  } q~q>|s| jD ]<}|
|r|dt|  }|dt|  } qȐq|
d.r|dd/ }|dd/ }|
dr|dd/ }|dd/ }|
d0r(|dd/ }|dddd}|S )1z
        Stem an Italian word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

        Fr     àr  r)  r     ìr     òr      ùr<   r  r  r;   Nr  rC   r#  r   )r  r  rH   )r  r   r  r5   r  Tr  r  r   r  )osrk  r&  r(  )r  r  r  r  )r  r  r  r  rk  )r  r  )r  r  r  r  r&  )r  r  ter  r   rk  r  )r  r  r  rg  )r  r5   rC   r  r  r)  r  r  r   )chgh)r-   r   r-  r?   r@   r.  _ItalianStemmer__vowelsrG   rK   _ItalianStemmer__step0_suffixesr.   r   _ItalianStemmer__step1_suffixes_ItalianStemmer__step2_suffixes)r   r4   r1  rC   rB   rF   rJ   r   r!   r!   r"   r   C  s    

    
&$&&
""









zItalianStemmer.stemN)	r'   r(   r)   r*   r  r  r  r  r   r!   r!   r!   r"   r  y  s   '5Zr  c                   @   s,   e Zd ZdZdZdZdZdZdZdd Z	d	S )
NorwegianStemmera  
    The Norwegian Snowball stemmer.

    :cvar __vowels: The Norwegian vowels.
    :type __vowels: unicode
    :cvar __s_ending: Letters that may directly appear before a word final 's'.
    :type __s_ending: unicode
    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
    :type __step1_suffixes: tuple
    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
    :type __step2_suffixes: tuple
    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
    :type __step3_suffixes: tuple
    :note: A detailed description of the Norwegian
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/norwegian/stemmer.html

    r   Zbcdfghjlmnoprtvyz)ZhetenesZhetenehetensheterhetenr   ander   Zedesr   erteZedeZaner   r   r   r   hetastertr   r  r   r  r   r   r  r5   r   )r   vt)ZhetslovZelegr  ZelovZslovlegZeigr  r  Zlovr  c                 C   s8  |  }|| jkr|S | || j}| jD ]}||r*|dkrZt||d}t||d}nn|dkr|d | jks|d dkr|d | jkr|dd }|dd }n$|dt|  }|dt|  } qq*| j	D ],}||r|dd }|dd } qq| j
D ]*}||r|dt|  } q4q|S )	z
        Stem a Norwegian word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

        )r  r  r   r   r   r  r   Nr   )r-   r   rD   _NorwegianStemmer__vowels!_NorwegianStemmer__step1_suffixesr.   r   _NorwegianStemmer__s_endingr@   !_NorwegianStemmer__step2_suffixes!_NorwegianStemmer__step3_suffixesr   r4   rB   r   r!   r!   r"   r   8  s<    








zNorwegianStemmer.stemN)
r'   r(   r)   r*   r  r  r  r  r  r   r!   r!   r!   r"   r    s    r  c                   @   s(   e Zd ZdZdZdZdZdZdd ZdS )	PortugueseStemmerav  
    The Portuguese Snowball stemmer.

    :cvar __vowels: The Portuguese vowels.
    :type __vowels: unicode
    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
    :type __step1_suffixes: tuple
    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
    :type __step2_suffixes: tuple
    :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm.
    :type __step4_suffixes: tuple
    :note: A detailed description of the Portuguese
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/portuguese/stemmer.html

    u   aeiouáéíóúâêô)/ZamentosZimentos   uço~esr  r  adorasadoresu   aço~eslogias   ênciasr  idadesu   ançasismosistasadorau   aça~or  u   ânciar     uça~o   ênciar  idadeu   ançaZezasicosicasr  u   ávelu   ívelr  osososasadorr  ivasivosr  Zezar  r  r  r  r  r  r  )x   aríamos   eríamos   iríamosu   ássemosu   êssemosu   íssemosu   aríeisu   eríeisu   iríeisu   ásseisu   ésseisu   ísseis   áramosu   éramosu   íramosu   ávamosaremoseremosiremosZariamZeriamZiriamZassemZessemZissemzara~ozera~ozira~oZariasZeriasZiriasZardesZerdesZirdesr  Zessesr  ZastesZestesr  u   áreisZareisu   éreisZereisu   íreisZireisu   áveisu   íamosZarmosZermosZirmosZariaZeriaZiriar  Zesser  asteZester  Zareir  r  ZaramZeramZiramZavamZaremZeremZiremr  r  Zindoadasidas   arásaras   erásr     irásZavasZaresr   Ziresu   íeisadosidosu   ámosamosemosimosr  adaida   aráara   erár     irár  iamadoidoZiasr  Zeisr  iarv  ra  r7  r  r   r  r  r   r  euiuZou)r  r  rC   r  r  r  r  c                 C   s  |  }|| jkr|S d}d}|dddddddd	}| || j\}}| || j}| jD ]}||rh|d
kr4||r4d}|dd }|dd }|dd }|dr|dd }|dd }|dd }|dr0|dd }|dd }n$|drz|dd }|dd }nF|dkr||r|t| d t|  dkrd}t	||d}t	||d}n||rzd}|dkrt	||d}t	||d}n|dkrt	||d}t	||d}n|dkrt	||d}t	||d}nl|dkrd|dd }|dd }|dd }|drz|dd }|dd }n|d kr|dt|  }|dt|  }|dt|  }|d!r|dd }|dd }n$|d"rz|dd }|dd }n|d#krV|dt|  }|dt|  }|dt|  }|drz|dd }|dd }n$|dt|  }|dt|  } qqh|s| j
D ]@}||rd}|dt|  }|dt|  } qАq|s|r|d$r|d d%kr|dd& }|dd& }|s^|s^| jD ]<}||r |dt|  }|dt|  } q^q |d'r|dd& }|dd& }|d	r|ds|d(r|d$r|dd& }n|d)rt	|d)d%}|dddd}|S )*z
        Stem a Portuguese word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

        F   ãza~   õzo~u   qüZquu   gür(  r  TNr  r  r   r  r  rk  rb  )r  r  r<   r5   r  )r  r  r%  )r  r  r  )r  r  enter  r&  )r  ZavelZivelr(  )r  r  r  r&  )r  r  r  r  rC   r'  r   )r5   r     êr  r'  )r-   r   r-  rG   _PortugueseStemmer__vowelsrK   "_PortugueseStemmer__step1_suffixesr.   r@   r   "_PortugueseStemmer__step2_suffixes"_PortugueseStemmer__step4_suffixes)r   r4   r1  r2  rB   rF   rJ   r   r!   r!   r"   r   ,  s    

   










zPortugueseStemmer.stemN)	r'   r(   r)   r*   r  r  r  r   r   r!   r!   r!   r"   r  k  s   1zr  c                   @   s,   e Zd ZdZdZdZdZdZdZdd Z	d	S )
RomanianStemmera  
    The Romanian Snowball stemmer.

    :cvar __vowels: The Romanian vowels.
    :type __vowels: unicode
    :cvar __step0_suffixes: Suffixes to be deleted in step 0 of the algorithm.
    :type __step0_suffixes: tuple
    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
    :type __step1_suffixes: tuple
    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
    :type __step2_suffixes: tuple
    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
    :type __step3_suffixes: tuple
    :note: A detailed description of the Romanian
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/romanian/stemmer.html

    u   aeiouăâî)iiloruluieloriileiloratei   aţie   aţiaauaeleiuaieiilerP  ear  ).	abilitate	abilitati   abilităţi	ibilitate	   abilităiivitateivitati	   ivităţiicitateicitati	   icităţiicatori   ivităi   icităiicator   aţiuneatoare   ătoare   iţiuneitoareicivaiciveicivi   icivăicalaicaleicali   icalăativarW  ativi   ativăr     ătoriitivaitiveitivi   itivăitoriicivrX  ativrO     ătoritivitor)>Zabilar  r  u   abilăZibilar  r  u   ibilăr  ZitateZitatiu   ităţir&  ZibilZoasau   oasăZoaseZantar  r  u   antărO  u   ităiiuneiunir  r  r  r     istă   iştir  u   atăr  rd  r  u   utăr  r  r  u   ităre  r  r  r  r  u   icăr  u   oşira  r  rg  r  u   ivărc  istr  utr  rk  r  r  )^
   seserăţiu	   aserăţiu	   iserăţiu
   âserăţiu	   userăţi   seserămu   aserămu   iserămu   âserămu   userăm   serăţi   seseşi   seserău   eascău   arăţiu   urăţiu   irăţiu   ârăţiu   aseşiu   aserău   iseşiu   iserău   âseşiu   âserău   useşiu   useră   serămsesemZinduu   ânduu   eazău   eştiu   eşteu   ăştiu   ăşteu   eaţiu   iaţiu   arămu   urămu   irămu   ârămZasemZisemu   âsemZusem   seşi   serăseser  r   r  u   âreindu   ândZezeZeziescu   ăscZeamZeaiZeaur  ZiaiZiauu   aşiu   arău   uşiu   urău   işiu   irău   âşiu   ârăaseZiseu   âseZuse   aţi   eţi   iţi   âţiseir  ra  r  aur/  r  Zuiu   âi   ămr7  rt     âmr  c           
      C   sz  |  }|| jkr|S d}d}tdt|d D ]}||d  | jkr0||d  | jkr0|| dkrd|d| d||d d f}q0|| dkr0d|d| d||d d f}q0| || j\}}| || j}| jD ]}|	|r||kr|d	kr<|dt|  }||kr6|dt|  }nd}n|d
ksl|dksl|dkrz|dd dkrz|dd }n~|dkrt
||d}||krt
||d}nd}nJ|dkrt
||d}||krt
||d}nd}n|dkr|dd } q qd}	| jD ]}|	|r
||krd}d}	|dkrDt
||d}n|dkr\|dd }n|dkrtt
||d}nr|dkrt
||d}nZ|dkrt
||d}||krt
||d}n,|d krt
||d!}||krt
||d!}nd} qq
|	s qq | jD ]}|	|r
||krd}|d"krX|d d#krd|dd d$f}n*|d%krpt
||d&}n|dt|  } qq
|s(|s(| jD ]}|	|r||kr|d'kr|dt|  }|dt|  }n6||s|||d  d(kr|dt|  } q(qd)D ]4}|	|r,||krX|dt|  } qbq,|dddd}|S )*z
        Stem a Romanian word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

        Fr<   r  r;   Nr  rC   r#  )rP  r#  r*  r'  r.  r&  r   abr   )r/  r+  r$  r5   )r  r,  r-  r%  r"  r&  )r(  r)  r   T)r0  r1  r4  r2  r&  r3  )r5  r6  r<  r7  r  )r8  r9  r=  r:  r>  r;  rU  rD  rE  rF  rG  rX  rH  rI  rJ  rK  rk  )rV  rL  rW  rM  rN  r?  r@  rO  r  rA  rW  rO  r  )	rX  rP  rQ  rR  rS  rB  rC  rY  rT  r  )rZ  r[  u   ţr  )rc  r  r^  r  r  r  r\  r]  r^  )r`  ra  rb  rc  rd  re  rf  rg  rh  ri  rm  rn  ro  rp  rq  rs  r7  rt  rt  r  u
   aeioăâî)r  r  r5   rC   u   ă)r-   r   r?   r@   _RomanianStemmer__vowelsr.  rG   rK    _RomanianStemmer__step0_suffixesr.   r    _RomanianStemmer__step1_suffixes _RomanianStemmer__step2_suffixes _RomanianStemmer__step3_suffixesr   r  r-  )
r   r4   r1  r2  rC   rB   rF   rJ   r   Zreplacement_doner!   r!   r"   r     s    

$&&































zRomanianStemmer.stemN)
r'   r(   r)   r*   rv  rw  rx  ry  rz  r   r!   r!   r!   r"   r!    s   0@ar!  c                   @   sL   e Zd ZdZdZdZdZdZdZdZ	dZ
d	d
 Zdd Zdd Zdd ZdS )RussianStemmeram  
    The Russian Snowball stemmer.

    :cvar __perfective_gerund_suffixes: Suffixes to be deleted.
    :type __perfective_gerund_suffixes: tuple
    :cvar __adjectival_suffixes: Suffixes to be deleted.
    :type __adjectival_suffixes: tuple
    :cvar __reflexive_suffixes: Suffixes to be deleted.
    :type __reflexive_suffixes: tuple
    :cvar __verb_suffixes: Suffixes to be deleted.
    :type __verb_suffixes: tuple
    :cvar __noun_suffixes: Suffixes to be deleted.
    :type __noun_suffixes: tuple
    :cvar __superlative_suffixes: Suffixes to be deleted.
    :type __superlative_suffixes: tuple
    :cvar __derivational_suffixes: Suffixes to be deleted.
    :type __derivational_suffixes: tuple
    :note: A detailed description of the Russian
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/russian/stemmer.html

    )	zivshis'zyvshis'vshis'ZivshiZyvshivshir  Zyvv)zui^ushchi^ui^uzui^ushchi^ai^azui^ushchimizui^ushchymizui^ushchegozui^ushchogozui^ushchemuzui^ushchomuzui^ushchikhzui^ushchykhzui^ushchui^uzui^ushchaiazui^ushchoi^uzui^ushchei^ui^ushchi^ui^ui^ushchi^ai^az
ui^ushcheez
ui^ushchiez
ui^ushchyez
ui^ushchoezui^ushchei`zui^ushchii`zui^ushchyi`zui^ushchoi`z
ui^ushchemz
ui^ushchimz
ui^ushchymz
ui^ushchom
i^ushchimi
i^ushchymi
i^ushchego
i^ushchogo
i^ushchemu
i^ushchomu
i^ushchikh
i^ushchykhi^ushchui^ui^ushchai^ai^ushchoi^ui^ushchei^u	i^ushchee	i^ushchie	i^ushchye	i^ushchoe
i^ushchei`
i^ushchii`
i^ushchyi`
i^ushchoi`	i^ushchem	i^ushchim	i^ushchym	i^ushchom
shchi^ui^u
shchi^ai^az
ivshi^ui^uz
ivshi^ai^az
yvshi^ui^uz
yvshi^ai^ashchimishchymishchegoshchogoshchemushchomushchikhshchykhshchui^ushchai^ashchoi^ushchei^uZivshimiZivshymiZivshegoZivshogoZivshemuZivshomuZivshikhZivshykhzivshui^uzivshai^azivshoi^uzivshei^uZyvshimiZyvshymiZyvshegoZyvshogoZyvshemuZyvshomuZyvshikhZyvshykhzyvshui^uzyvshai^azyvshoi^uzyvshei^u	vshi^ui^u	vshi^ai^ashcheeshchieshchyeshchoeshchei`shchii`shchyi`shchoi`shchemshchimshchymshchomZivsheeZivshieZivshyeZivshoezivshei`zivshii`zivshyi`zivshoi`ZivshemZivshimZivshymZivshomZyvsheeZyvshieZyvshyeZyvshoezyvshei`zyvshii`zyvshyi`zyvshoi`ZyvshemZyvshimZyvshymZyvshomvshimivshymivshegovshogovshemuvshomuvshikhvshykhvshui^uvshai^avshoi^uvshei^uemi^ui^uemi^ai^anni^ui^unni^ai^avsheevshievshyevshoevshei`vshii`vshyi`vshoi`vshemvshimvshymvshomemimiemymiemegoemogoememuemomuemikhemykhemui^uemai^aemoi^uemei^unniminnyminnegonnogonnemunnomunnikhnnykhnnui^unnai^annoi^unnei^uemeeemieemyeemoeemei`emii`emyi`emoi`emememimemymemomnneenniennyennoennei`nnii`nnyi`nnoi`nnemnnimnnymnnomzi^ui^uzi^ai^aZimiZymiZegoZogoZemuZomuZikhZykhui^uzai^azoi^uzei^ur*  r  yeZoeei`ii`zyi`oi`r7  rt  ymr`  )zsi^azs').esh'zei`tezui`tezui^utzish'r  i`tei^utnnoZilaZylaZenar  ZiliZyliZiloZyloZenozi^atZuetZenyzit'zyt'r  r  r  rS  r7  r  nor   rH  t'r  zui`ilZylrt  r  r   r  Zyti^ui`rl  r  )$zii^amizii^akhzi^amizii^amzi^akhZamiziei`zi^amZiemZakhzii^uz'i^uzii^az'i^aZevovr  z'erv  r  r  r  r  r7  ra  r`  r  i^ar  r5   rC   r  r  r  r!  r4  )zei`shezei`sh)zost'Zostc                 C   s  || j kr|S d}tt|D ]}t|| dkrd} q<q|sD|S | |}d}d}d}d}d}| |\}	}
| jD ]}|	|rv|dkr|	t| d t|  dks|	t| d t|  dkrR|d	t|  }|
d	t|  }
|	d	t|  }	d} qTqv|d	t|  }|
d	t|  }
|	d	t|  }	d} qTqv|s| jD ]N}|	|r`|d	t|  }|
d	t|  }
|	d	t|  }	 qq`| j	D ]}|	|r|d
krV|	t| d t|  dks|	t| d t|  dkr|d	t|  }|
d	t|  }
|	d	t|  }	d} qn@|d	t|  }|
d	t|  }
|	d	t|  }	d} qq|s| j
D ]}|	|r|dkrF|	t| d t|  dks|	t| d t|  dkr|d	t|  }|
d	t|  }
|	d	t|  }	d} qn@|d	t|  }|
d	t|  }
|	d	t|  }	d} qq|s|s| jD ]N}|	|r|d	t|  }|
d	t|  }
|	d	t|  }	 qq|	dr|d	d }|
d	d }
| jD ]*}|
|r|d	t|  } qBq|dr^|d	d }d}|s| jD ].}||rj|d	t|  }d} qqj|dr|d	d }|s|s|dr|d	d }| |}|S )z
        Stem a Russian word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

        F   T)r~  r}  r|  r=   r  r<   r  N)r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   )r  r  r  r  rS  r  rl  r7  r  r  r  r   r	  rH  r  r  r
  rC   r   r   r4  )r   r?   r@   ord"_RussianStemmer__cyrillic_to_roman _RussianStemmer__regions_russian+_RussianStemmer__perfective_gerund_suffixesr.   #_RussianStemmer__reflexive_suffixes$_RussianStemmer__adjectival_suffixes_RussianStemmer__verb_suffixes_RussianStemmer__noun_suffixes&_RussianStemmer__derivational_suffixes%_RussianStemmer__superlative_suffixes"_RussianStemmer__roman_to_cyrillic)r   r4   Zchr_exceededrC   r1  Zadjectival_removedZverb_removedZundouble_successZsuperlative_removedrJ   rF   r   r!   r!   r"   r     s    









 










zRussianStemmer.stemc                 C   s&  d}d}d}d}| dd dd dd}td	t|D ]4}|| |kr:||d	  |kr:||d	 d
 } qpq:td	t|D ]4}|| |kr~||d	  |kr~||d	 d
 } qq~tt|D ]$}|| |kr||d	 d
 } qq| dd dd dd}| dd dd dd}||fS )a=  
        Return the regions RV and R2 which are used by the Russian stemmer.

        In any word, RV is the region after the first vowel,
        or the end of the word if it contains no vowel.

        R2 is the region after the first non-vowel following
        a vowel in R1, or the end of the word if there is no such non-vowel.

        R1 is the region after the first non-vowel following a vowel,
        or the end of the word if there is no such non-vowel.

        :param word: The Russian word whose regions RV and R2 are determined.
        :type word: str or unicode
        :return: the regions RV and R2 for the respective Russian word.
        :rtype: tuple
        :note: This helper method is invoked by the stem method of the subclass
               RussianStemmer. It is not to be invoked directly!

        r;   )	Ar  Er  r5   rC   r  r  r!  r  r  r  r  e`r  r<   N)r-  r?   r@   )r   r4   rB   rF   rJ   rA   rC   r!   r!   r"   Z__regions_russian3  s(    z RussianStemmer.__regions_russianc                 C   s  | dd dd dd dd dd d	d d
d dd dd dd dd dd dd dd dd dd dd dd dd dd dd d d d!d" d#d" d$d% d&d% d'd( d)d( d*d+ d,d+ d-d. d/d. d0d1 d2d1 d3d4 d5d4 d6d7 d8d7 d9d: d;d: d<d= d>d= d?d@ dAd@ dBdC dDdC dEdF dGdF dHdI dJdI dKdL dMdL dNdO dPdO dQdR dSdR dTdU dVdU dWdX dYdX dZd[ d\d[ d]d^ d_d^ d`da dbda}|S )ca#  
        Transliterate a Russian word into the Roman alphabet.

        A Russian word whose letters consist of the Cyrillic
        alphabet are transliterated into the Roman alphabet
        in order to ease the forthcoming stemming process.

        :param word: The word that is transliterated.
        :type word: unicode
        :return: the transliterated word.
        :rtype: unicode
        :note: This helper method is invoked by the stem method of the subclass
               RussianStemmer. It is not to be invoked directly!

        u   Аr     аu   Бb   бu   Вr~     вu   Гg   гu   Дri     дu   Еr5      еu   Ёu   ёu   Жzh   жu   Зz   зu   ИrC      иu   Йr     йu   Кr     кu   Лrl     лu   Мrh     мu   Нr     нu   Оr     оu   Пp   пu   Рr   рu   Сr      сu   Тr     тu   Уr     уu   Фf   фu   Хkh   хu   Цt^s   цu   Чr     чu   Шsh   шu   Щshch   щu   Ъ''   ъu   Ыr!     ыu   Ьr4     ьu   Эr      эu   Юr     юu   Яr     яr-  r   r4   r!   r!   r"   Z__cyrillic_to_romanc  s           	 
                        ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A Ez"RussianStemmer.__cyrillic_to_romanc                 C   s  | dd dd dd dd d	d
 dd dd dd dd dd dd dd dd dd dd dd  d!d" dd d#d$ d%d& d'd( d)d* d+d, d-d. d/d0 d1d2 d3d4 d5d6 d7d8 d9d: d;d< d=d> d?d@}|S )AaH  
        Transliterate a Russian word back into the Cyrillic alphabet.

        A Russian word formerly transliterated into the Roman alphabet
        in order to ease the stemming process, is transliterated back
        into the Cyrillic alphabet, its original form.

        :param word: The word that is transliterated.
        :type word: str or unicode
        :return: word, the transliterated word.
        :rtype: unicode
        :note: This helper method is invoked by the stem method of the subclass
               RussianStemmer. It is not to be invoked directly!

        r  rK  r  rL  rD  rE  r=  r>  r?  r@  r  rA  r   rJ  r  r.  rB  rC  r  r/  r5   r(  r)  r*  r  r!  r"  r#  r~  r$  r%  r&  ri  r'  r+  r,  rC   r-  rl  r0  rh  r1  r  r2  r  r3  r4  r5  r6  r7  r   r8  r  r9  r  r:  r;  r<  rF  rG  r!  rH  r4  rI  rM  rN  r!   r!   r"   Z__roman_to_cyrillic  s            	 
                        $z"RussianStemmer.__roman_to_cyrillicN)r'   r(   r)   r*   r  r  r  r  r  r  r  r   r  r  r  r!   r!   r!   r"   r{    s    m0&  #0Wr{  c                   @   s8   e Zd ZdZdZdZdZdZdZdZ	dd	 Z
d
d ZdS )SpanishStemmeraR  
    The Spanish Snowball stemmer.

    :cvar __vowels: The Spanish vowels.
    :type __vowels: unicode
    :cvar __step0_suffixes: Suffixes to be deleted in step 0 of the algorithm.
    :type __step0_suffixes: tuple
    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
    :type __step1_suffixes: tuple
    :cvar __step2a_suffixes: Suffixes to be deleted in step 2a of the algorithm.
    :type __step2a_suffixes: tuple
    :cvar __step2b_suffixes: Suffixes to be deleted in step 2b of the algorithm.
    :type __step2b_suffixes: tuple
    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
    :type __step3_suffixes: tuple
    :note: A detailed description of the Spanish
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/spanish/stemmer.html

    u   aeiouáéíóúü)ZselasZselosZselaZseloZlasZlesZlosZnosmer  r  r  r  )/ZamientosZimientosZamientoZimientoacionacionesucionesr  r  ancias   logíasenciasr  r  Zanzasr  r  Ziblesr  r     aciónr  ancia   logía   uciónenciar  r  r  r  r  r^  r_  r  r  r  r  r  idadr  r  r  r  r  r  r  r  )ZyeronyendoZyamosZyaisZyanyenZyasyesZyar  Zyou   yó)`r  r  r  u   iéramosu   iésemosu   aríaisr  u   eríaisr  u   iríaisr  ZieraisZieseisZasteisZisteisu   ábamosr  u   ásemosu   aríanu   aríasu   aréisu   eríanu   eríasu   eréisu   iríanu   iríasu   iréisZieranZiesenZieroniendoZierasZiesesZabaisZaraisZaseisu   éamosu   aránr  u   aríau   eránr  u   eríau   iránr  u   iríaZieraZieser   r  ZabanZaranZasenZaronr  Zabasr  r  r  Zasesu   íaisr  r  r	  r  r
  r  u   arér  u   erér  u   iréZabar  r  r  rl  u   íanr  r  u   íasu   áis   éisu   íarb  r;  ru  r  u   iór  r   r  r  u   ísr   r   )r  r  r5   r  r  r  r  r  c                 C   s  |  }|| jkr|S d}| || j\}}| || j}| jD ]}||r@||sZq@|dt|  ds|dt|  dr|dt|  dr| |dt|  }| |dt|  }| |dt|  }| |dt|  } qq@| j	D ]2}||s*q|dkr||rd}|dd }|dd }|dd }|d	r|dd
 }|dd
 }|dd
 }|dr|dd
 }|dd
 }n$|dr@|dd
 }|dd
 }nZ||r@d}|dkr^|dt|  }|dt|  }|dt|  }|dr@|dd
 }|dd
 }n|dkrt
||d}t
||d}n|dkrt
||d}t
||d}n|dkrt
||d}t
||d}np|dkr8|dt|  }|dt|  }|dt|  }|dr@|dd }|dd }n|dkr|dt|  }|dt|  }|dt|  }dD ]6}||r||dt|  }|dt|  }q|n|dkr|dt|  }|dt|  }|dt|  }|dr@|dd
 }|dd
 }n$|dt|  }|dt|  } qJq|s4| jD ]^}||rV|t| d t|  dkrV|dt|  }|dt|  } qqV| jD ]v}||r|dt|  }|dt|  }|dkr*|dr|dd }|dr*|dd } q4q| jD ]p}||r:|dt|  }|dkr|dt|  }|d
d dkr|dr|dd } qq:| |}|S ) z
        Stem a Spanish word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

        FN)
r  u   ándor  u   árr   u   érr`  u   iéndor  u   írr]  Zuyendor  Tr  r  r   r  r  )r  r  rW  r  r  rQ  rR  r  r  rX  rT  rk  )rY  rU  r%  )rZ  rS  r  )r[  rV  r  r  )r  r^  r_  r(  )r\  r  )r&  rk  r  )r  r  r  r  r<   )r   r   ra  r
  r(  r   )r5   r  )r-   r   rG   _SpanishStemmer__vowelsrK   _SpanishStemmer__step0_suffixesr.   r@   !_SpanishStemmer__replace_accented_SpanishStemmer__step1_suffixesr    _SpanishStemmer__step2a_suffixes _SpanishStemmer__step2b_suffixes_SpanishStemmer__step3_suffixes)r   r4   r1  rB   rF   rJ   r   Zpre_suffr!   r!   r"   r     s    











.







zSpanishStemmer.stemc                 C   s,   | dd dd dd dd d	d
S )u}  
        Replaces all accented letters on a word with their non-accented
        counterparts.

        :param word: A spanish word, with or without accents
        :type word: str or unicode
        :return: a word with the accented letters (á, é, í, ó, ú) replaced with
                 their non-accented counterparts (a, e, i, o, u)
        :rtype: str or unicode
        r  r  r  r5   r  rC   r  r  r   r  rM  rN  r!   r!   r"   Z__replace_accentedl  s        z!SpanishStemmer.__replace_accentedN)r'   r(   r)   r*   rb  rc  re  rf  rg  rh  r   rd  r!   r!   r!   r"   rO    s   1b 2rO  c                   @   s,   e Zd ZdZdZdZdZdZdZdd Z	d	S )
SwedishStemmera  
    The Swedish Snowball stemmer.

    :cvar __vowels: The Swedish vowels.
    :type __vowels: unicode
    :cvar __s_ending: Letters that may directly appear before a word final 's'.
    :type __s_ending: unicode
    :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm.
    :type __step1_suffixes: tuple
    :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm.
    :type __step2_suffixes: tuple
    :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm.
    :type __step3_suffixes: tuple
    :note: A detailed description of the Swedish
           stemming algorithm can be found under
           http://snowball.tartarus.org/algorithms/swedish/stemmer.html

    u   aeiouyäåöZbcdfghjklmnoprtvy)%Zheternar  r  r  ZandenZarnasZernasZornasru  ZandetZarensZarnaZernaZornar  Zarner   ZarenZadesZernsZader  r6  r   r  r  rb  r   r  r   orr  r   r  r  r5   r   )r   r   r   r   r   r   r   )fullt   löstr  r  r  c                 C   s  |  }|| jkr|S | || j}| jD ]f}||r*|dkrh|d | jkr|dd }|dd }n$|dt|  }|dt|  } qq*| jD ]*}||r|dd }|dd } qq| j	D ]F}||r|dkr|dt|  }n|dkr
|dd } qq|S )z
        Stem a Swedish word and return the stemmed form.

        :param word: The word that is stemmed.
        :type word: str or unicode
        :return: The stemmed form.
        :rtype: unicode

        r   r   Nr   )r  r  r  )rk  rl  )
r-   r   rD   _SwedishStemmer__vowels_SwedishStemmer__step1_suffixesr.   _SwedishStemmer__s_endingr@   _SwedishStemmer__step2_suffixes_SwedishStemmer__step3_suffixesr  r!   r!   r"   r     s4    








zSwedishStemmer.stemN)
r'   r(   r)   r*   rm  ro  rn  rp  rq  r   r!   r!   r!   r"   ri    s   'ri  c                     sT  ddl m}  ddddddd	d
ddddddddd}td td td td tddtj d d }|dkrzqP|tjkrtd qRt| | || dd }d fdd|D }t	d d!|d 
 }d|}t	d d!|d 
 }td td" td#d$ t| td% td&d$ t| td" td qRdS )'a<  
    This function provides a demonstration of the Snowball stemmers.

    After invoking this function and specifying a language,
    it stems an excerpt of the Universal Declaration of Human Rights
    (which is a part of the NLTK corpus collection) and then prints
    out the original and the stemmed text.

    r   )udhrzArabic_Alarabia-ArabiczDanish_Dansk-Latin1zDutch_Nederlands-Latin1zEnglish-Latin1zFinnish_Suomi-Latin1zFrench_Francais-Latin1zGerman_Deutsch-Latin1zHungarian_Magyar-UTF8zItalian_Italiano-Latin1zNorwegian-Latin1zPortuguese_Portugues-Latin1zRomanian_Romana-Latin2zRussian-UTF8zSpanish-Latin1zSwedish_Svenska-Latin1r   
z******************************zDemo for the Snowball stemmersz9Please enter the name of the language to be demonstrated
/z"(enter 'exit' in order to leave): exitz@
Oops, there is no stemmer for this language. Please try again.
Ni,   c                 3   s   | ]}  |V  qd S r$   )r   ).0r4   r   r!   r"   	<genexpr>-  s     zdemo.<locals>.<genexpr>z
(.{,70})\sz\1\nzF----------------------------------------------------------------------ZORIGINALF   z

zSTEMMED RESULTS)nltk.corpusrr  printinputr.  r   r   r0   r   rz   rstripcenter)rr  Zudhr_corpusr   ZexcerptZstemmedr!   rx  r"   demo  sj    


r  ) r*   r   r{  r   Z	nltk.stemr   Znltk.stem.apir   Znltk.stem.utilr   r   r   r+   r9   r:   rE   rL   r   r  r3  r  r  r5  rE  r  r  r  r!  r{  rO  ri  r  r!   r!   r!   r"   <module>   sz   
R(+[    ` # A      Y   q       zz  \   f      O   q