U
    e                     @   s`   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ G dd de j	Z
G dd	 d	e j	ZdS )
    N)closing)data)PorterStemmer)SnowballStemmerc                   @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )SnowballTestc                 C   s\  t dd}|ddkst|ddks.t|ddks@t|dd	ksRt|d
d	ksdt|dd	ksvt|dd	kst|ddkst|ddkstt dd}|ddkst|dd	kst|ddkstt d}|ddkst|ddkst|ddks0t|dd	ksDt|ddksXtdS )z
        this unit testing for test the snowball arabic light stemmer
        this stemmer deals with prefixes and suffixes
        arabicTu&   الْعَرَبِــــــيَّةu   عربu   العربيةu   فقالواu   قالu   الطالباتu   طالبu   فالطالباتu   والطالباتu   الطالبونu   اللذانu   منFu   اللذu   الكلماتu   كلمNr   stemAssertionError)selfZ
ar_stemmer r   Y/var/www/html/assets/scripts/venv/lib/python3.8/site-packages/nltk/test/unit/test_stem.pytest_arabic
   s(    

zSnowballTest.test_arabicc                 C   s   t d}|ddkstd S )Nrussianu   авантненькаяu   авантненькr   )r   Zstemmer_russianr   r   r   test_russian'   s    zSnowballTest.test_russianc                 C   s`   t d}t ddd}|ddks&t|ddks8t|ddksJt|ddks\td S )NgermanT)Zignore_stopwordsu	   SchränkeZschrankZkeinenZkeinr   )r   Zstemmer_germanZstemmer_german2r   r   r   test_german+   s    zSnowballTest.test_germanc                 C   s0   t d}|ddkst|ddks,td S )NspanishZ	VisionadoZvisionZalgueZalgur   r   stemmerr   r   r   test_spanish5   s    zSnowballTest.test_spanishc                 C   s   t d}|ddkstd S )Nenglishzy'syr   r   r   r   r   test_short_strings_bug=   s    z#SnowballTest.test_short_strings_bugN)__name__
__module____qualname__r   r   r   r   r   r   r   r   r   r   	   s
   
r   c                   @   sD   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dS )
PorterTestc              
   C   s:   t tdjdd}|  W  5 Q R  S Q R X d S )Nz*stemmers/porter_test/porter_vocabulary.txtutf-8encoding)r   r   findopenread
splitlinesr   fpr   r   r   _vocabularyC   s    
zPorterTest._vocabularyc                 C   sL   t |d}t|  |D ].\}}||}||kstd||||qd S )N)modez*{} should stem to {} in {} mode but got {})r   zipr'   r	   r
   format)r   Zstemmer_modeZexpected_stemsr   wordZ	true_stemZour_stemr   r   r   _test_against_expected_outputK   s    

z(PorterTest._test_against_expected_outputc              	   C   s<   t tdjdd}| tj|   W 5 Q R X dS )az  Tests all words from the test vocabulary provided by M Porter

        The sample vocabulary and output were sourced from
        https://tartarus.org/martin/PorterStemmer/voc.txt and
        https://tartarus.org/martin/PorterStemmer/output.txt
        and are linked to from the Porter Stemmer algorithm's homepage
        at https://tartarus.org/martin/PorterStemmer/
        z-stemmers/porter_test/porter_martin_output.txtr   r   N)	r   r   r!   r"   r,   r   ZMARTIN_EXTENSIONSr#   r$   r%   r   r   r   test_vocabulary_martin_modeX   s    	
 
z&PorterTest.test_vocabulary_martin_modec              	   C   s<   t tdjdd}| tj|   W 5 Q R X d S )Nz+stemmers/porter_test/porter_nltk_output.txtr   r   )	r   r   r!   r"   r,   r   ZNLTK_EXTENSIONSr#   r$   r%   r   r   r   test_vocabulary_nltk_modej   s    
 
z$PorterTest.test_vocabulary_nltk_modec              	   C   s`   t tdjdd}| tj|   W 5 Q R X | tjtdjdd   d S )Nz/stemmers/porter_test/porter_original_output.txtr   r   )	r   r   r!   r"   r,   r   ZORIGINAL_ALGORITHMr#   r$   r%   r   r   r   test_vocabulary_original_modet   s     
 

z(PorterTest.test_vocabulary_original_modec                 C   s   t  ddkstdS )zTest for bug https://github.com/nltk/nltk/issues/1581

        Ensures that 'oed' can be stemmed without throwing an error.
        ZoedoNr   r	   r
   )r   r   r   r   test_oed_bug   s    zPorterTest.test_oed_bugc                 C   sl   t  }|ddkst|ddks*t|jddddks@t|ddksRt|jddddkshtd	S )
zTest for improvement on https://github.com/nltk/nltk/issues/2507

        Ensures that stems are lowercased when `to_lowercase=True`
        ZOnonIiF)Zto_lowercaseZGithubZgithubNr1   )r   Zporterr   r   r   test_lowercase_option   s    z PorterTest.test_lowercase_optionN)
r   r   r   r'   r,   r-   r.   r/   r2   r6   r   r   r   r   r   B   s   
r   )Zunittest
contextlibr   Znltkr   Znltk.stem.porterr   Znltk.stem.snowballr   ZTestCaser   r   r   r   r   r   <module>   s   9