U
    e{                     @   s:   d Z ddlZddlmZ ddlmZ G dd dejZdS )z
Unit tests for nltk.tgrep.
    N)tgrep)ParentedTreec                   @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 Zd:d; Z d<S )=TestSequenceFunctionsz5
    Class containing unit tests for nltk.tgrep.
    c                 C   sF   t d}| |dddddddd	d
dddddddddddddg dS )z.
        Simple test of tokenization.
        %A .. (B !< C . D) | ![<< (E , F) $ G]A..(B!<C.D)|[<<E,F$G]N)r   tgrep_tokenizeassertEqual)selftokens r   Z/var/www/html/assets/scripts/venv/lib/python3.8/site-packages/nltk/test/unit/test_tgrep.pytest_tokenize_simple   s6    
z*TestSequenceFunctions.test_tokenize_simplec                 C   s   |  tdtd dS )zM
        Test that tokenization handles bytes and strs the same way.
        s%   A .. (B !< C . D) | ![<< (E , F) $ G]r   Nr   r   r   r   r   r   r   test_tokenize_encoding<   s    z,TestSequenceFunctions.test_tokenize_encodingc                 C   s  |  tddddg |  tddddg |  tddddg |  td	dd
dg |  tddddg |  tddddg |  tddddg |  tddddg |  tddddg |  tddddg |  tddddg |  tddddg |  tddddg |  tddddg |  tddd dg |  td!dd"dg |  td#dd$dg |  td%dd&dg |  td'dd(dg |  td)dd*dg |  td+dd,dg |  td-dd.dg |  td/dd0dg |  td1dd2dg |  td3dd4dg |  td5dd6dg |  td7dd8dg |  td9dd:dg |  td;dd<dg |  td=dd>dg |  td?dd@dg |  tdAddBddg |  tdCddBddg |  tdDddBddg |  tdEddBd
dg |  tdFddBddg |  tdGddBddg |  tdHddBddg |  tdIddBddg |  tdJddBddg |  tdKddBddg |  tdLddBddg |  tdMddBddg |  tdNddBddg |  tdOddBddg |  tdPddBd dg |  tdQddBd"dg |  tdRddBd$dg |  tdSddBd&dg |  tdTddBd(dg |  tdUddBd*dg |  tdVddBd,dg |  tdWddBd.dg |  tdXddBd0dg |  tdYddBd2dg |  tdZddBd4dg |  td[ddBd6dg |  td\ddBd8dg |  td]ddBd:dg |  td^ddBd<dg |  td_ddBd>dg |  td`ddBd@dg daS )bz8
        Test tokenization of basic link types.
        zA<Br   r   r	   zA>B>zA<3Bz<3zA>3Bz>3zA<,B<,zA>,Bz>,zA<-3Bz<-3zA>-3Bz>-3zA<-Bz<-zA>-Bz>-zA<'B<'zA>'Bz>'zA<:Bz<:zA>:Bz>:zA<<Br   zA>>B>>zA<<,Bz<<,zA>>,Bz>>,zA<<'Bz<<'zA>>'Bz>>'zA<<:B<<:zA>>:Bz>>:zA.Br   zA,Br   zA..Br   zA,,Bz,,zA$Br   zA$.Bz$.zA$,Bz$,zA$..B$..zA$,,Bz$,,zA!<Br
   zA!>BzA!<3BzA!>3BzA!<,BzA!>,BzA!<-3BzA!>-3BzA!<-BzA!>-BzA!<'BzA!>'BzA!<:BzA!>:BzA!<<BzA!>>BzA!<<,BzA!>>,BzA!<<'BzA!>>'BzA!<<:BzA!>>:BzA!.BzA!,BzA!..BzA!,,BzA!$BzA!$.BzA!$,BzA!$..BzA!$,,BNr    r!   r   r   r   test_tokenize_link_typesE   s|    z.TestSequenceFunctions.test_tokenize_link_typesc                 C   sh  |  tddddg |  tddg |  tdddddd	g |  td
dddddd	g |  tddddddddddd	dg |  tddddddd	dg |  tddddddddddddg |  tddddddddddg	 |  tddddddddddddg |  td dddddddddg	 |  td!dddd"ddg d#S )$zJ
        Test tokenization of the TGrep2 manual example patterns.
        NP < PPNPr   ZPP/^NP/NP << PP . VPr   r   ZVPNP << PP | . VPr   NP !<< PP [> NP | >> VP]r
   r   r#   r&   r   NP << (PP . VP)r   r   NP <' (PP <, (IN < on))r%   r$   INonS < (A < B) < CSr   r	   r   S < ((A < B) < C)S < (A < B < C)zA<B&.C&Nr    r!   r   r   r   test_tokenize_examples   sF      z,TestSequenceFunctions.test_tokenize_examplesc              
   C   s$   |  tddddddddg dS )	z/
        Test tokenization of quoting.
        z"A<<:B"<<:"A $.. B"<"A>3B"<Cz"A<<:B"r'   z	"A $.. B"r   z"A>3B"r   Nr    r!   r   r   r   test_tokenize_quoting   s    z+TestSequenceFunctions.test_tokenize_quotingc              	   C   s   |  tddg |  tddg |  tddg |  tddg |  tdddg |  tddd	d
dg |  tddd	d
d	dg |  tddd	d
d	d
dg dS )z2
        Test tokenization of node names.
        ZRobertz	/^[Bb]ob/*__zN()zN(r   zN(0,)0r   zN(0,0)zN(0,0,)Nr    r!   r   r   r   test_tokenize_nodenames   s     z-TestSequenceFunctions.test_tokenize_nodenamesc                 C   s>   |  tddddddddddd	d
ddddddd
ddg dS )z9
        Test tokenization of macro definitions.
        z4@ NP /^NP/;
@ NN /^NN/;
@NP [!< NP | < @NN] !$.. @NN@r+   r,   ;NNz/^NN/z@NPr   r
   r   r   z@NNr   r(   Nr    r!   r   r   r   test_tokenize_macros   s4    z*TestSequenceFunctions.test_tokenize_macrosc                 C   sx   t d}| ttd|gddgg | ttd|g|d |d gg | ttd|gdddgg dS )z`
        Test a simple use of tgrep for finding nodes matching a given
        pattern.
        A(S (NP (DT the) (JJ big) (NN dog)) (VP bit) (NP (DT a) (NN cat)))rA   r      rE      NN|JJr   rG   Nr   
fromstringr   listr   tgrep_positionsZtgrep_nodesr   treer   r   r   test_node_simple   s       
z&TestSequenceFunctions.test_node_simplec                 C   s^   t d}| ttd|gttd|g | ttd|gttd|g dS )z9Test that the tgrep print operator ' is properly ignored.(S (n x) (N x))Nz'Nz/[Nn]/z'/[Nn]/Nr   rK   r   rL   r   rM   rN   r   r   r   test_node_printing   s    
z(TestSequenceFunctions.test_node_printingc                 C   s   t d}| ttd|gttd|g | ttd|gttd|g | ttd|gttd|g dS )z]
        Test that tgrep search strings handles bytes and strs the same
        way.
        rC   s   NNrA   s   NN|JJrH   NrJ   rN   r   r   r   test_node_encoding  s    z(TestSequenceFunctions.test_node_encodingc                 C   sL   t d}| ttd|gdgg | ttd|gddgg dS )zI
        Test selecting nodes using case insensitive node names.
        rQ   "N"rG   zi@"N"r   NrS   rN   r   r   r   test_node_nocase  s    
z&TestSequenceFunctions.test_node_nocasec                 C   s   t d}| ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gd	gg d
S )z?
        Test selecting nodes using quoted node names.
        z(N ("N" x) (N" x) ("\" x))rV   r   z"\"N\""rX   z"N\""rW   z"\"\\\""rE   NrS   rN   r   r   r   test_node_quoted   s
    
z&TestSequenceFunctions.test_node_quotedc                 C   s.   t d}| ttd|gddgg dS )/
        Test regex matching on nodes.
        $(S (NP-SBJ x) (NP x) (NNP x) (VP x))r,   rX   rW   NrS   rN   r   r   r   test_node_regex*  s    
z%TestSequenceFunctions.test_node_regexc                 C   sP   t d}| ttd|gddgg | ttd|gdddgg dS )r\   z(S (SBJ x) (SBJ1 x) (NP-SBJ x))z/^SBJ/rX   rW   z/SBJ/rZ   NrS   rN   r   r   r   test_node_regex_23  s    
  
z'TestSequenceFunctions.test_node_regex_2c                    s   t dfddtt D   fdd D }|D ]H}d| }tt|g}| 	t|d d | 	|d d | qBd	S )
zE
        Test matching on nodes based on NLTK tree position.
        r]   c                    s   h | ]}  |qS r   )Zleaf_treeposition.0x)rO   r   r   	<setcomp>E  s     z@TestSequenceFunctions.test_node_tree_position.<locals>.<setcomp>c                    s   g | ]}| kr|qS r   r   r`   )leaf_positionsr   r   
<listcomp>F  s      zATestSequenceFunctions.test_node_tree_position.<locals>.<listcomp>rR   r   rG   N)
r   rK   rangelenleavesZtreepositionsrL   r   rM   r   )r   Ztree_positionspositionZnode_idrM   r   )rd   rO   r   test_node_tree_position?  s    

z-TestSequenceFunctions.test_node_tree_positionc                 C   sL   t d}| ttd|gddgg | ttd|gdg g dS )zS
        Test node name matching with the search_leaves flag set to False.
        (S (A (T x)) (B (N x)))rb   r   r   r   rG   r   r   FNrS   rN   r   r   r   test_node_noleavesM  s    
 z(TestSequenceFunctions.test_node_noleavesc                 C   s@  t d}| ttd|gdgg | ttd|gdgg | ttd|gdddd	d
dgg | ttd|gd	gg | ttd|gdgg | ttd|gd
gg | ttd|gddddd	dgg | ttd|gddd	gg | ttd|gddd	d
gg | ttd|gddgg | ttd|gd	d
gg | ttd|gddgg | ttd|gdgg | ttd|gd	gg | ttd|gddd	d
dgg t d}| ttd|gdgg | ttd|gdd	gg | ttd|gdddd	d
dddgg | ttd|gd	gg t d}| ttd|gd
gg | ttd |gddddd!d"d	dgg t d#}| ttd$|gdddd%d&d'd(gg | ttd)|gddd%d&d*d
dgg d+S ),zC
        Test matching nodes based on dominance relations.
        rk   z* < TrX   z	* < T > Sz* !< Tr   r   r   rl   rW   )rG   r   rm   z
* !< T > Sz* > Az* > Bz* !> Bz* !> B >> Sz* >> Sz* >>, Sz* >>' Sz* << Tz* <<' Tz* <<1 Nz* !<< Tz(S (A (T x)) (B (T x) (N x )))z* <: Tz* !<: T)rG   rG   )rG   rG   r   z* !<: T > Sz(S (T (A x) (B x)) (T (C x)))z* >: Tz* !>: TrI   r   rG   r   z=(S (A (B (C (D (E (T x)))))) (A (B (C (D (E (T x))) (N x)))))z* <<: T)r   r   r   r   )r   r   r   r   r   rG   r   r   r   )rG   r   r   r   r   z* >>: A)r   r   r   r   r   r   NrS   rN   r   r   r   tests_rel_dominanceW  s    
 
   
 
z)TestSequenceFunctions.tests_rel_dominancec                 C   s(   t d}| tjttd|g dS )zC
        Test error handling of undefined tgrep operators.
        rk   z* >>> SN)r   rK   assertRaisesr   TgrepExceptionrL   rM   rN   r   r   r   test_bad_operator  s    
  z'TestSequenceFunctions.test_bad_operatorc                 C   sV   t d}d}| tt||gddgg d}| tt||gddgg dS )z`
        Test that comments are correctly filtered out of tgrep search
        strings.
        z(S (NN x) (NP x) (NN x))z=
        @ NP /^NP/;
        @ NN /^NN/;
        @NN
        rX   rZ   zg
        # macros
        @ NP /^NP/;
        @ NN /^NN/;

        # search string
        @NN
        NrS   )r   rO   Zsearch1Zsearch2r   r   r   test_comments  s
    
 z#TestSequenceFunctions.test_commentsc                 C   s   t d}| ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gddgg d	S )
z7
        Test matching sister nodes in a tree.
        (S (A x) (B x) (C x))z* $. BrX   z* $.. Bz* $, BrZ   z* $,, Bz* $ BNrS   rN   r   r   r   test_rel_sister_nodes  s    
z+TestSequenceFunctions.test_rel_sister_nodesc                 C   s  t d}| ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd	|gdgg | ttd
|gdgg | ttd|gdgg | ttd|gdgg t d}| ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg dS )zP
        Test matching nodes based on their index in their parent node.
        rw   z* >, SrX   z* >1 Sz* >2 SrW   z* >3 SrZ   z* >' Sz* >-1 Sz* >-2 Sz* >-3 SzE(S (D (A x) (B x) (C x)) (E (B x) (C x) (A x)) (F (C x) (A x) (B x)))z* <, Az* <1 Az* <2 Az* <3 Az* <' Az* <-1 Az* <-2 Az* <-3 ANrS   rN   r   r   r   tests_rel_indexed_children  s(    
z0TestSequenceFunctions.tests_rel_indexed_childrenc              
   C   s0  t d}| ttd|gdddgg | ttd|gddgg | ttd	|gdd
dddgg | ttd|gdd
dddddgg | ttd|gddgg | ttd|gddddgg | ttd|gddddddgg | ttd|gddddgg dS )zD
        Test matching nodes based on precedence relations.
        zV(S (NP (NP (PP x)) (NP (AP x))) (VP (AP (X (PP x)) (Y (AP x)))) (NP (RC (NP (AP x)))))z* . XrX   rI   rp   z* . Yrm   rq   z* .. Xro   rl   z* .. Yz* , XrG   r   rG   )rG   r   rG   r   z* , YrZ   )rE   r   )rE   r   r   )rE   r   r   r   z* ,, Xz* ,, YNrS   rN   r   r   r   test_rel_precedence  sF     
  z)TestSequenceFunctions.test_rel_precedencec                 C   sf  t d}| ttd|gdgg t d}| ttd|gdgg t d}| ttd|gddgg t d	}| ttd
|gddgg t d}| ttd|gdgg t d}| ttd|gdgg t d}| ttd|gdgg t d}| ttd|gdgg | ttd|gdgg dS )zA
        Test the Basic Examples from the TGrep2 manual.
        z(S (NP (AP x)) (NP (PP x)))r*   rW   z$(S (NP x) (VP x) (NP (PP x)) (VP x))r-   rZ   z6(S (NP (AP x)) (NP (PP x)) (NP (DET x) (NN x)) (VP x))r.   zX(S (NP (NP (PP x)) (NP (AP x))) (VP (AP (NP (PP x)) (NP (AP x)))) (NP (RC (NP (AP x)))))r/   rI   rz   z:(S (NP (AP (PP x) (VP x))) (NP (AP (PP x) (NP x))) (NP x))r0   rX   ze(S (NP (DET a) (NN cat) (PP (IN on) (NP x))) (NP (DET a) (NN cat) (PP (IN on) (NP x)) (PP x)) (NP x))r1   z;(S (S (C x) (A (B x))) (S (C x) (A x)) (S (D x) (A (B x))))r4   z/(S (S (A (B x) (C x))) (S (S (C x) (A (B x)))))r6   r7   NrS   rN   r   r   r   test_examples  sd    

      z#TestSequenceFunctions.test_examplesc                 C   sH   t d}| ttd|gddgg | tjttd|g dS )z8
        Test defining and using tgrep2 macros.
        zi(VP (VB sold) (NP (DET the) (NN heiress)) (NP (NN deed) (PREP to) (NP (DET the) (NN school) (NN house))))z+@ NP /^NP/;
@ NN /^NN/;
@NP !< @NP !$.. @NNrW   )rE   rE   z,@ NP /^NP/;
@ NN /^NN/;
@CNP !< @NP !$.. @NNN)r   rK   r   rL   r   rM   rs   rt   rN   r   r   r   test_use_macrosg  s(     	 z%TestSequenceFunctions.test_use_macrosc                 C   sd   |  tdddddddddddd	d
d
g |  tdddddddddddddddd	d
d
g dS )z#Test tokenization of labeled nodes.!S < @SBJ < (@VP < (@VB $.. @OBJ))r5   r   @SBJr   @VP@VBr(   @OBJr   z%S < @SBJ=s < (@VP=v < (@VB $.. @OBJ))=svNr    r!   r   r   r   test_tokenize_node_labels  sL    z/TestSequenceFunctions.test_tokenize_node_labelsc                 C   s@   |  tddddddddddd	ddd
ddddddddg dS )z(Test tokenization of segmented patterns.z0S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =vr5   r   r   r   r   r   r   r   r   r(   r   r   :z=sr   z=vNr    r!   r   r   r    test_tokenize_segmented_patterns  s2    z6TestSequenceFunctions.test_tokenize_segmented_patternsc                 C   s*  d}t d}t d}|dd d }d}| tt||gd  | tt||gd  | tt||gd  | tt||gtt||g | tt||gd  | tt||gd  | tt||gd  | tt||gtt||g dS )	zN
        Test labeled nodes.

        Test case from Emily M. Bender.
        z
            # macros
            @ SBJ /SBJ/;
            @ VP /VP/;
            @ VB /VB/;
            @ VPoB /V[PB]/;
            @ OBJ /OBJ/;

            # 1 svo
            S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =vz2(S (NP-SBJ I) (VP (VB eat) (NP-OBJ (NNS apples))))z2(S (VP (VB eat) (NP-OBJ (NNS apples))) (NP-SBJ I))z

r   r~   z-S < (/.*SBJ/ $.. (/VP/ < (/VB/ $.. /.*OBJ/)))N)	r   rK   split
assertTruerL   r   rM   r   ZassertFalse)r   searchZsent1Zsent2Zsearch_firsthalfZsearch_rewriter   r   r   test_labeled_nodes  s.    
z(TestSequenceFunctions.test_labeled_nodesc                 C   sL   t d}| ttd|gdgg | ttd|gddgg dS )zm
        Test that multiple (3 or more) conjunctions of node relations are
        handled properly.
        z'((A (B b) (C c)) (A (B b) (C c) (D d)))z(A < B < C < D)rW   z(A < B < C)rX   NrS   )r   sentr   r   r   test_multiple_conjs  s    
  z)TestSequenceFunctions.test_multiple_conjsc                 C   sn   t d}| ttd|gddgg | ttd|gddgg | ttd|gddgg dS )zp
        Test that semicolons at the end of a tgrep2 search string won't
        cause a parse failure.
        rC   rA   rD   rF   zNN;zNN;;NrS   rN   r   r   r   test_trailing_semicolon   s       z-TestSequenceFunctions.test_trailing_semicolonN)!__name__
__module____qualname____doc__r   r"   r)   r9   r:   r>   rB   rP   rT   rU   rY   r[   r^   r_   rj   rn   rr   ru   rv   rx   ry   r{   r|   r}   r   r   r   r   r   r   r   r   r   r      s<   !	C&	 
	
W	'O+(r   )r   ZunittestZnltkr   Z	nltk.treer   ZTestCaser   r   r   r   r   <module>
   s   