U
    e@                     @   s,   d Z ddlZddlmZ G dd deZdS )a[  
ARLSTem2 Arabic Light Stemmer
The details about the implementation of this algorithm are described in:
K. Abainia and H. Rebbani, Comparing the Effectiveness of the Improved ARLSTem
Algorithm with Existing Arabic Light Stemmers, International Conference on
Theoretical and Applicative Aspects of Computer Science (ICTAACS'19), Skikda,
Algeria, December 15-16, 2019.
ARLSTem2 is an Arabic light stemmer based on removing the affixes from
the words (i.e. prefixes, suffixes and infixes). It is an improvement
of the previous Arabic light stemmer (ARLSTem). The new version was compared to
the original algorithm and several existing Arabic light stemmers, where the
results showed that the new version considerably improves the under-stemming
errors that are common to light stemmers. Both ARLSTem and ARLSTem2 can be run
online and do not use any dictionary.
    N)StemmerIc                   @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"S )#ARLSTem2u  
    Return a stemmed Arabic word after removing affixes. This an improved
    version of the previous algorithm, which reduces under-stemming errors.
    Typically used in Arabic search engine, information retrieval and NLP.

        >>> from nltk.stem import arlstem2
        >>> stemmer = ARLSTem2()
        >>> word = stemmer.stem('يعمل')
        >>> print(word)
        عمل

    :param token: The input Arabic word (unicode) to be stemmed
    :type token: unicode
    :return: A unicode Arabic word
    c                 C   s   t d| _t d| _t d| _ddddg| _dd	d
g| _ddg| _dddg| _ddg| _	ddg| _
ddg| _ddg| _dddg| _ddg| _ddg| _ddg| _dd g| _d!d"d#d$g| _d%d&g| _d'd(d)d*g| _d+d,d-g| _d S ).Nz[\u0622\u0623\u0625]z[\u0649]z[\u064B-\u065F]u   الu   للu   فلu   فبu   بالu   كالu   والu   فللu   وللu   فبالu   وبالu   فكالu   كيu   كمu   هاu   همu   كماu   كنّu   هماu   هنّu   انu   ين   ونu   تانu   تينu   ستu   سيu   ساu   سنu   لنu   لتu   ليu   لأu   تماu   تنّ   ناu   تمu   تا   وا   ت   ا   ن)recompilere_hamzated_alifre_alifMaqsurare_diacriticspr2pr3pr32pr4su2su22su3su32pl_si2pl_si3verb_su2verb_pr2	verb_pr22	verb_pr33	verb_suf3	verb_suf2	verb_suf1)self r!   S/var/www/html/assets/scripts/venv/lib/python3.8/site-packages/nltk/stem/arlstem2.py__init__2   s>    









zARLSTem2.__init__c              
   C   s   z|dkrt dd| _| |}| |}|dk	r8|}| |}|dk	rP|W S | |}|dk	rh|W S | |}| |}|dkr|dkr| |}|dk	rd| _|W S n|W S |W S  t k
r } zt	| W 5 d}~X Y nX dS )z:
        call this function to get the first stem
        NUThe word could not be stemmed, because                                  it is empty !FT)

ValueErrorZis_verbnormpreffem2masc	adjectivesuff	plur2singverbprint)r    tokenprefmZadjZpsr,   er!   r!   r"   stem1l   s8    






zARLSTem2.stem1c              
   C   s  z|d krt d| |}t|dkr|drX|d dkrX|dd |d  }|W S |dr|d d	kr|dd |d  }|W S t|d
kr|ds|dr|d d }|W S |dr|dd  W S |W S  t k
r } zt| W 5 d }~X Y nX d S )Nr$      r      ي      م   و   r   u   ل)r%   r2   len
startswithendswithr-   )r    r.   r1   r!   r!   r"   stem   s,    

zARLSTem2.stemc                 C   sP   | j d|}| jd|}| jd|}|drLt|dkrL|dd }|S )z
        normalize the word by removing diacritics, replace hamzated Alif
        with Alif bare, replace AlifMaqsura with Yaa and remove Waaw at the
        beginning.
         r   r5   r9   r:   r6   N)r   subr   r   r<   r;   r    r.   r!   r!   r"   r&      s    zARLSTem2.normc                 C   s   t |dkr2| jD ]}||r|dd   S qt |dkrd| jD ]}||rD|dd   S qDt |dkr| jD ]}||rv|dd   S qvt |dkr| jD ]}||r|dd   S qdS )z<
        remove prefixes from the words' beginning.
           r:   N   r3      )r;   r   r<   r   r   r   )r    r.   Zp3Zp4p2r!   r!   r"   r'      s     







zARLSTem2.prefc                 C   sD   t |dkr@|dr@|d dkr@|dr@|dd |d  S dS )z4
        remove the infixes from adjectives
        rB   r   r5   Nr4   r;   r<   r=   rA   r!   r!   r"   r)      s    
zARLSTem2.adjectivec                 C   s<  | dr"t|dkr"|dd S t|dkrT| jD ]}| |r4|dd   S q4t|dkr| jD ]}| |rf|dd   S qf| d	rt|dkr|dd }|S t|dkr| jD ]}| |r|dd   S qt|dkr| jD ]}| |r|dd   S q| d
r8t|dkr8|dd S |S )z=
        remove the suffixes from the word's ending.
        u   كr:   Nr7   r3   r4   rB   rF   u   هr   )r=   r;   r   r   r   r   )r    r.   s2Zs3r!   r!   r"   r*      s0    







zARLSTem2.suffc                 C   s  t |dkrt|dr@|d dkr@|dr@|dd |d  S |drt|d dkrt|drt|d	d |d  S |d
rt |dkr|d	d S t |dkr|d dkr|dr|d |dd  S |dr|d	d S |drt |dkr|d	d S d	S )zR
        transform the word from the feminine form to the masculine form.
        rC   r   r5   u   يةr6   rF   r   Nu   ايةrB   r4   r3   u   ةr   rD   r7   r:   rG   rA   r!   r!   r"   r(     s0    


zARLSTem2.fem2mascc                 C   s  t |dkr,|dr,|dr,|dd S t |dkr^| jD ]}||r>|dd   S q>t |dkr| jD ]}||rp|dd   S qpt |dkr|d	r|dd S |d
r|d d
kr|dd |dd  S |d
r|d d
kr|dd |d  S dS )zO
        transform the word from the plural form to the singular form.
        rB   r8   r   r6   r4   r3   NrF   u   اتr   rD   r:   r7   )r;   r<   r=   r   r   )r    r.   Zps2Zps3r!   r!   r"   r+   )  s$    




zARLSTem2.plur2singc                 C   s|   |  |}|dk	r|S | |}|dk	r,|S | |}|dk	rB|S | |}|dk	rX|S | |}|dk	rn|S | |}|S )z=
        stem the verb prefixes and suffixes or both
        N)verb_t1verb_t2verb_t3verb_t4verb_t5verb_t6)r    r.   Zvbr!   r!   r"   r,   D  s"    





zARLSTem2.verbc                 C   sZ  t |dkr<|dr<| jD ]}||r|dd   S qt |dkrx|drx| jD ]}||rX|dd   S qXt |dkr|drt |dkr|dr|dd S |dr|dd	 S |dr|dd	 S |d
r|dd	 S t |dkr$|dr$|d
r$|dd	 S t |dkrV|drV|d
rV|dd	 S dS )zJ
        stem the present tense co-occurred prefixes and suffixes
        rB   r   r6   r4   r5   r3   r   r   r7   r	   N)r;   r<   r   r=   r   r    r.   rH   r!   r!   r"   rJ   Z  s*    






&&zARLSTem2.verb_t1c                 C   s  t |dkr| jD ].}|| jd r||r|dd   S q|| jd rn|| jd rn|dd S || jd r|| jd r|dd S t |dkr|| jd r|dr|dd S t |dkr|| jd r|dr|dd S d	S )
zI
        stem the future tense co-occurred prefixes and suffixes
        rC   r   rD   r4   r6   rB   r	   r7   N)r;   r   r<   r   r=   rP   r!   r!   r"   rK   z  s,    
  

zARLSTem2.verb_t2c                 C   s   t |dkr2| jD ]}||r|dd   S qt |dkrd| jD ]}||rD|dd   S qDt |dkr| jD ]}||rv|dd   S qvdS )z1
        stem the present tense suffixes
        rB   NrF   r3   r4   r:   r7   )r;   r   r=   r   r   )r    r.   r   r   Zsu1r!   r!   r"   rL     s    





zARLSTem2.verb_t3c                 C   sL   t |dkrH| jD ]}||r|dd   S q|drH|dd S dS )z1
        stem the present tense prefixes
        r:   r6   Nr5   )r;   r   r<   )r    r.   Zpr1r!   r!   r"   rM     s    


zARLSTem2.verb_t4c                 C   s\   t |dkrX| jD ]}||r|dd   S q| jD ]}||r8|dd   S q8dS )z0
        stem the future tense prefixes
        r3   rD   N)r;   r   r<   r   )r    r.   r   r!   r!   r"   rN     s    



zARLSTem2.verb_t5c                 C   s6   t |dkr2| jD ]}||r|dd   S q|S )z4
        stem the imperative tense prefixes
        r3   rD   N)r;   r   r<   )r    r.   r   r!   r!   r"   rO     s
    

zARLSTem2.verb_t6N)__name__
__module____qualname____doc__r#   r2   r>   r&   r'   r)   r*   r(   r+   r,   rJ   rK   rL   rM   rN   rO   r!   r!   r!   r"   r   !   s"   :* ! r   )rT   r
   Znltk.stem.apir   r   r!   r!   r!   r"   <module>   s   