U
    e~                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZ z4d dlm	Z	 d dl
mZ d dlmZ d dlmZ W n ek
r|   Y nX d d	lmZmZmZ G d
d dZG dd dZG dd deZdd ZdS )    N)deepcopy)
itemgetter)remove)array)sparse)svm)load_svmlight_file)DependencyEvaluatorDependencyGraphParserIc                   @   s2   e Zd ZdZdd Zdd ZdddZd	d
 ZdS )Configurationa  
    Class for holding configuration which is the partial analysis of the input sentence.
    The transition based parser aims at finding set of operators that transfer the initial
    configuration to the terminal configuration.

    The configuration includes:
        - Stack: for storing partially proceeded words
        - Buffer: for storing remaining input words
        - Set of arcs: for storing partially built dependency tree

    This class also provides a method to represent a configuration as list of features.
    c                 C   s<   dg| _ ttdt|j| _g | _|j| _t| j| _dS )z
        :param dep_graph: the representation of an input in the form of dependency graph.
        :type dep_graph: DependencyGraph where the dependencies are not specified.
        r      N)	stacklistrangelennodesbufferarcs_tokens_max_address)selfZ	dep_graph r   \/var/www/html/assets/scripts/venv/lib/python3.8/site-packages/nltk/parse/transitionparser.py__init__(   s
    zConfiguration.__init__c                 C   s*   dt | j d t | j d t | j S )NzStack : z  Buffer : z
   Arcs : )strr   r   r   )r   r   r   r   __str__4   s    zConfiguration.__str__Fc                 C   s0   |dkrdS |dkrdS |dkr,|dkr,dS dS )zs
        Check whether a feature is informative
        The flag control whether "_" is informative or not
        NF _Tr   )r   featflagr   r   r   _check_informative>   s    z Configuration._check_informativec                 C   s.  g }t | jdkr| jt | jd  }| j| }| |d drT|d|d   d|kr|| |d r||d|d   | |d r|d	|d   d
|kr| |d
 r|d
 d}|D ]}|d|  qt | jdkr(| jt | jd  }| j| }| |d r(|d|d   d}d}d}	d}
| jD ]N\}}}||kr>||krn||krn|}|}
||k r>||k r>|}|}	q>| |	r|d|	  | |
r|d|
  t | jdkr*| jd }| j| }| |d dr
|d|d   d|kr6| |d r6|d|d   | |d rX|d|d   d
|kr| |d
 r|d
 d}|D ]}|d|  qt | jdkr| jd }| j| }| |d dr|d|d   | |d r|d|d   t | jdkrJ| jd }| j| }| |d rJ|d|d   t | jdkr| jd }| j| }| |d r|d|d   d}d}d}	d}
| jD ]N\}}}||kr||kr||kr|}|}
||k r||k r|}|}	q| |	r|d|	  | |
r*|d|
  |S )a/  
        Extract the set of features for the current configuration. Implement standard features as describe in
        Table 3.2 (page 31) in Dependency Parsing book by Sandra Kubler, Ryan McDonal, Joakim Nivre.
        Please note that these features are very basic.
        :return: list(str)
        r   r   wordTZSTK_0_FORM_ZlemmaZSTK_0_LEMMA_tagZ
STK_0_POS_feats|ZSTK_0_FEATS_   Z
STK_1_POS_i@B r   ZSTK_0_LDEP_ZSTK_0_RDEP_ZBUF_0_FORM_ZBUF_0_LEMMA_Z
BUF_0_POS_ZBUF_0_FEATS_ZBUF_1_FORM_Z
BUF_1_POS_Z
BUF_2_POS_   Z
BUF_3_POS_ZBUF_0_LDEP_ZBUF_0_RDEP_)r   r   r   r!   appendsplitr   r   )r   resultZ
stack_idx0tokenr$   r   Z
stack_idx1Z	left_mostZ
right_mostZdep_left_mostZdep_right_mostZwirZwjZbuffer_idx0Zbuffer_idx1Zbuffer_idx2Zbuffer_idx3r   r   r   extract_featuresL   s    











zConfiguration.extract_featuresN)F)__name__
__module____qualname____doc__r   r   r!   r.   r   r   r   r   r      s
   

r   c                   @   sH   e Zd ZdZdZdZdZdZdd Zdd	 Z	d
d Z
dd Zdd ZdS )
Transitionz
    This class defines a set of transition which is applied to a configuration to get another configuration
    Note that for different parsing algorithm, the transition is different.
    ZLEFTARCZRIGHTARCSHIFTREDUCEc                 C   s.   || _ |tjtjfkr*tdtjtjf dS )z
        :param alg_option: the algorithm option of this parser. Currently support `arc-standard` and `arc-eager` algorithm
        :type alg_option: str
        % Currently we only support %s and %s N)_algoTransitionParserARC_STANDARD	ARC_EAGER
ValueError)r   Z
alg_optionr   r   r   r      s    
zTransition.__init__c           	      C   s   t |jdkst |jdkr dS |jd dkr2dS |jt |jd  }d}| jtjkrt|jD ]\}}}||kr\d}q\|r|j  |jd }|j|||f ndS dS )a  
        Note that the algorithm for left-arc is quite similar except for precondition for both arc-standard and arc-eager

        :param configuration: is the current configuration
        :return: A new configuration or -1 if the pre-condition is not satisfied
        r   r'   r   TFN)	r   r   r   r7   r8   r:   r   popr)   )	r   confrelationidx_wir    
idx_parentr-   	idx_childidx_wjr   r   r   left_arc   s    

zTransition.left_arcc                 C   s   t |jdkst |jdkr dS | jtjkr^|j }|jd }||jd< |j|||f n>|jt |jd  }|jd}|j| |j|||f dS )z
        Note that the algorithm for right-arc is DIFFERENT for arc-standard and arc-eager

        :param configuration: is the current configuration
        :return: A new configuration or -1 if the pre-condition is not satisfied
        r   r'   r   N)	r   r   r   r7   r8   r9   r<   r   r)   )r   r=   r>   r?   rB   r   r   r   	right_arc   s    


zTransition.right_arcc                 C   sp   | j tjkrdS t|jdkr"dS |jt|jd  }d}|jD ]\}}}||kr@d}q@|rh|j  ndS dS )z
        Note that the algorithm for reduce is only available for arc-eager

        :param configuration: is the current configuration
        :return: A new configuration or -1 if the pre-condition is not satisfied
        r'   r   r   FTN)r7   r8   r:   r   r   r   r<   )r   r=   r?   r    r@   r-   rA   r   r   r   reduce   s    zTransition.reducec                 C   s.   t |jdkrdS |jd}|j| dS )z
        Note that the algorithm for shift is the SAME for arc-standard and arc-eager

        :param configuration: is the current configuration
        :return: A new configuration or -1 if the pre-condition is not satisfied
        r   r'   N)r   r   r<   r   r)   )r   r=   r?   r   r   r   shift  s    zTransition.shiftN)r/   r0   r1   r2   LEFT_ARC	RIGHT_ARCr4   r5   r   rC   rD   rE   rF   r   r   r   r   r3      s   r3   c                   @   sb   e Zd ZdZdZdZdd Zdd Zdd	 Zd
d Z	dd Z
dd Zdd ZdddZdd ZdS )r8   zl
    Class for transition based parser. Implement 2 algorithms which are "arc-standard" and "arc-eager"
    zarc-standardz	arc-eagerc                 C   s@   || j | jfkr$td| j | jf || _i | _i | _i | _dS )z
        :param algorithm: the algorithm option of this parser. Currently support `arc-standard` and `arc-eager` algorithm
        :type algorithm: str
        r6   N)r9   r:   r;   
_algorithm_dictionary_transition_match_transition)r   	algorithmr   r   r   r   +  s    
zTransitionParser.__init__c                 C   sD   |j | }|j | }|d d kr$d S |d |d kr<|d S d S d S )Nr"   headaddressrel)r   )r   r@   rA   depgraphZp_nodec_noder   r   r   _get_dep_relation;  s    

z"TransitionParser._get_dep_relationc                 C   sJ   g }|D ](}| j |t| j  || j |  qddd t|D S )z
        :param features: list of feature string which is needed to convert to binary features
        :type features: list(str)
        :return : string of binary features in libsvm format  which is 'featureID:value' pairs
         c                 s   s   | ]}t |d  V  qdS )z:1.0N)r   ).0Z	featureIDr   r   r   	<genexpr>S  s    z?TransitionParser._convert_to_binary_features.<locals>.<genexpr>)rJ   
setdefaultr   r)   joinsorted)r   featuresZunsorted_resultfeaturer   r   r   _convert_to_binary_featuresG  s    
z,TransitionParser._convert_to_binary_featuresc           
      C   s   g }|j D ]<}|j | }d|kr
|d }|d }|d k	r
|||f q
|D ]\}}||krh|}|}|}t|d |D ]T}tt|j D ]@}	|	|k s|	|kr||	f|kr   dS |	|f|kr   dS qqvqLdS )NrN   rO   r   FT)r   r)   r   r   )
r   rQ   Zarc_listkeynodeZchildIdxZ	parentIdxtempkmr   r   r   _is_projectiveW  s*    


zTransitionParser._is_projectivec                 C   sV   | j |t| j d  || j| j | < t| j | d | d }||d dS )z^
        write the binary features to input file and update the transition dictionary
        r   rT   
zutf-8N)rK   rW   r   rL   r   writeencode)r   r]   binary_features
input_fileZ	input_strr   r   r   _write_to_fileq  s    zTransitionParser._write_to_filec                 C   s  t | j}d}g }|D ]}| |s(q|d7 }t|}t|jdkr|jd }| }	| |	}
t|jdkrx|jt|jd  }| 	|||}|dk	rt j
d | }| ||
| ||| || q8| 	|||}|dk	rxd}|j}t|d D ]>}||k r| 	|||}|dk	 r|||f|jk rd} q|rxt jd | }| ||
| ||| || q8t j}| ||
| || || q8qtdtt|  tdt|  |S )	z
        Create the training example in the libsvm format and write it to the input_file.
        Reference : Page 32, Chapter 3. Dependency Parsing by Sandra Kubler, Ryan McDonal and Joakim Nivre (2009)
        r   r   N:TF Number of training examples : ) Number of valid (projective) examples : )r3   r9   rb   r   r   r   r.   r\   r   rS   rG   rh   rC   r)   r   r   r   rH   rD   r4   rF   printr   )r   	depgraphsrg   	operationZ
count_projtraining_seqrQ   r=   b0rZ   rf   s0rP   r]   preconditionZmaxIDwZrelwr   r   r   !_create_training_examples_arc_std{  sX    










z2TransitionParser._create_training_examples_arc_stdc                 C   s  t | j}d}g }|D ]}| |s(q|d7 }t|}t|jdkr|jd }| }	| |	}
t|jdkr|jt|jd  }| 	|||}|dk	rt j
d | }| ||
| ||| || q8| 	|||}|dk	rt jd | }| ||
| ||| || q8d}t|D ]6}| 	|||dk	rDd}| 	|||dk	r(d}q(|rt j}| ||
| || || q8t j}| ||
| || || q8qtdtt|  tdt|  |S )	z
        Create the training example in the libsvm format and write it to the input_file.
        Reference : 'A Dynamic Oracle for Arc-Eager Dependency Parsing' by Joav Goldberg and Joakim Nivre
        r   r   Nri   FTrj   rk   )r3   r:   rb   r   r   r   r.   r\   r   rS   rG   rh   rC   r)   rH   rD   r   r5   rE   r4   rF   rl   r   )r   rm   rg   rn   Z	countProjro   rQ   r=   rp   rZ   rf   rq   rP   r]   r    r`   r   r   r   #_create_training_examples_arc_eager  s^    










z4TransitionParser._create_training_examples_arc_eagerTc              	   C   s   ztjdt dd}| j| jkr0| || n| || |	  t
|j\}}tjddddd|d	d
}||| t|t|d W 5 t |j X dS )z
        :param depgraphs : list of DependencyGraph as the training data
        :type depgraphs : DependencyGraph
        :param modelfile : file name to save the trained model
        :type modelfile : str
        ztransition_parse.trainF)prefixdirdeleteZpolyr&   r   g?g      ?T)ZkernelZdegreeZcoef0gammaCverboseZprobabilitywbN)r   nametempfileNamedTemporaryFile
gettempdirrI   r9   rt   ru   closer   r   ZSVCfitpickledumpopen)r   rm   Z	modelfiler{   rg   Zx_trainZy_trainmodelr   r   r   train  s.      
zTransitionParser.trainc           !      C   sd  g }t t|d}t| j}|D ]:}t|}t|jdkr| }g }	g }
g }|D ]2}|| j	krX|	
| j	|  |

d |
d qXtt|	}t|
}t|}tj|||ffdt| j	fd}i }||d }tt|D ]}|| ||< qt| tddd}|D ]\}}|j| }|| jkr| j| }|dd }|tjkrv|||dd d	kr q0np|tjkr|||dd d	kr q0nB|tjkr||d	kr q0n |tjkr||d	kr q0ntd
qq0t|}|j D ] }|j | }d|d< d|d< q|j!D ]&\}}}|j | } || d< || d< q,|
| q"|S )aZ  
        :param depgraphs: the list of test sentence, each sentence is represented as a dependency graph where the 'head' information is dummy
        :type depgraphs: list(DependencyGraph)
        :param modelfile: the model file
        :type modelfile: str
        :return: list (DependencyGraph) with the 'head' and 'rel' information
        rbr   g      ?r   )shapeT)r]   reverseri   r'   z;The predicted transition is not recognized, expected errorsr   rP   rN   )"r   loadr   r3   rI   r   r   r   r.   rJ   r)   r   rY   r   Z
csr_matrixZpredict_probar   itemsr   Zclasses_rL   r*   rG   rC   rH   rD   r5   rE   r4   rF   r;   r   r   r   )!r   rm   Z	modelFiler+   r   rn   rQ   r=   rZ   colrowdatar[   Znp_colZnp_rowZnp_dataZx_testZ	prob_dictZ	pred_probiZsorted_ProbZ
y_pred_idx
confidenceZy_predZstrTransitionZbaseTransitionZnew_depgraphr]   r^   rN   rP   childrR   r   r   r   parse"  s    




 





zTransitionParser.parseN)T)r/   r0   r1   r2   r9   r:   r   rS   r\   rb   rh   rt   ru   r   r   r   r   r   r   r8   "  s   
>@
)r8   c                   C   s   dS )a6  
    >>> from nltk.parse import DependencyGraph, DependencyEvaluator
    >>> from nltk.parse.transitionparser import TransitionParser, Configuration, Transition
    >>> gold_sent = DependencyGraph("""
    ... Economic  JJ     2      ATT
    ... news  NN     3       SBJ
    ... has       VBD       0       ROOT
    ... little      JJ      5       ATT
    ... effect   NN     3       OBJ
    ... on     IN      5       ATT
    ... financial       JJ       8       ATT
    ... markets    NNS      6       PC
    ... .    .      3       PU
    ... """)

    >>> conf = Configuration(gold_sent)

    ###################### Check the Initial Feature ########################

    >>> print(', '.join(conf.extract_features()))
    STK_0_POS_TOP, BUF_0_FORM_Economic, BUF_0_LEMMA_Economic, BUF_0_POS_JJ, BUF_1_FORM_news, BUF_1_POS_NN, BUF_2_POS_VBD, BUF_3_POS_JJ

    ###################### Check The Transition #######################
    Check the Initialized Configuration
    >>> print(conf)
    Stack : [0]  Buffer : [1, 2, 3, 4, 5, 6, 7, 8, 9]   Arcs : []

    A. Do some transition checks for ARC-STANDARD

    >>> operation = Transition('arc-standard')
    >>> operation.shift(conf)
    >>> operation.left_arc(conf, "ATT")
    >>> operation.shift(conf)
    >>> operation.left_arc(conf,"SBJ")
    >>> operation.shift(conf)
    >>> operation.shift(conf)
    >>> operation.left_arc(conf, "ATT")
    >>> operation.shift(conf)
    >>> operation.shift(conf)
    >>> operation.shift(conf)
    >>> operation.left_arc(conf, "ATT")

    Middle Configuration and Features Check
    >>> print(conf)
    Stack : [0, 3, 5, 6]  Buffer : [8, 9]   Arcs : [(2, 'ATT', 1), (3, 'SBJ', 2), (5, 'ATT', 4), (8, 'ATT', 7)]

    >>> print(', '.join(conf.extract_features()))
    STK_0_FORM_on, STK_0_LEMMA_on, STK_0_POS_IN, STK_1_POS_NN, BUF_0_FORM_markets, BUF_0_LEMMA_markets, BUF_0_POS_NNS, BUF_1_FORM_., BUF_1_POS_., BUF_0_LDEP_ATT

    >>> operation.right_arc(conf, "PC")
    >>> operation.right_arc(conf, "ATT")
    >>> operation.right_arc(conf, "OBJ")
    >>> operation.shift(conf)
    >>> operation.right_arc(conf, "PU")
    >>> operation.right_arc(conf, "ROOT")
    >>> operation.shift(conf)

    Terminated Configuration Check
    >>> print(conf)
    Stack : [0]  Buffer : []   Arcs : [(2, 'ATT', 1), (3, 'SBJ', 2), (5, 'ATT', 4), (8, 'ATT', 7), (6, 'PC', 8), (5, 'ATT', 6), (3, 'OBJ', 5), (3, 'PU', 9), (0, 'ROOT', 3)]


    B. Do some transition checks for ARC-EAGER

    >>> conf = Configuration(gold_sent)
    >>> operation = Transition('arc-eager')
    >>> operation.shift(conf)
    >>> operation.left_arc(conf,'ATT')
    >>> operation.shift(conf)
    >>> operation.left_arc(conf,'SBJ')
    >>> operation.right_arc(conf,'ROOT')
    >>> operation.shift(conf)
    >>> operation.left_arc(conf,'ATT')
    >>> operation.right_arc(conf,'OBJ')
    >>> operation.right_arc(conf,'ATT')
    >>> operation.shift(conf)
    >>> operation.left_arc(conf,'ATT')
    >>> operation.right_arc(conf,'PC')
    >>> operation.reduce(conf)
    >>> operation.reduce(conf)
    >>> operation.reduce(conf)
    >>> operation.right_arc(conf,'PU')
    >>> print(conf)
    Stack : [0, 3, 9]  Buffer : []   Arcs : [(2, 'ATT', 1), (3, 'SBJ', 2), (0, 'ROOT', 3), (5, 'ATT', 4), (3, 'OBJ', 5), (5, 'ATT', 6), (8, 'ATT', 7), (6, 'PC', 8), (3, 'PU', 9)]

    ###################### Check The Training Function #######################

    A. Check the ARC-STANDARD training
    >>> import tempfile
    >>> import os
    >>> input_file = tempfile.NamedTemporaryFile(prefix='transition_parse.train', dir=tempfile.gettempdir(), delete=False)

    >>> parser_std = TransitionParser('arc-standard')
    >>> print(', '.join(parser_std._create_training_examples_arc_std([gold_sent], input_file)))
     Number of training examples : 1
     Number of valid (projective) examples : 1
    SHIFT, LEFTARC:ATT, SHIFT, LEFTARC:SBJ, SHIFT, SHIFT, LEFTARC:ATT, SHIFT, SHIFT, SHIFT, LEFTARC:ATT, RIGHTARC:PC, RIGHTARC:ATT, RIGHTARC:OBJ, SHIFT, RIGHTARC:PU, RIGHTARC:ROOT, SHIFT

    >>> parser_std.train([gold_sent],'temp.arcstd.model', verbose=False)
     Number of training examples : 1
     Number of valid (projective) examples : 1
    >>> input_file.close()
    >>> remove(input_file.name)

    B. Check the ARC-EAGER training

    >>> input_file = tempfile.NamedTemporaryFile(prefix='transition_parse.train', dir=tempfile.gettempdir(),delete=False)
    >>> parser_eager = TransitionParser('arc-eager')
    >>> print(', '.join(parser_eager._create_training_examples_arc_eager([gold_sent], input_file)))
     Number of training examples : 1
     Number of valid (projective) examples : 1
    SHIFT, LEFTARC:ATT, SHIFT, LEFTARC:SBJ, RIGHTARC:ROOT, SHIFT, LEFTARC:ATT, RIGHTARC:OBJ, RIGHTARC:ATT, SHIFT, LEFTARC:ATT, RIGHTARC:PC, REDUCE, REDUCE, REDUCE, RIGHTARC:PU

    >>> parser_eager.train([gold_sent],'temp.arceager.model', verbose=False)
     Number of training examples : 1
     Number of valid (projective) examples : 1

    >>> input_file.close()
    >>> remove(input_file.name)

    ###################### Check The Parsing Function ########################

    A. Check the ARC-STANDARD parser

    >>> result = parser_std.parse([gold_sent], 'temp.arcstd.model')
    >>> de = DependencyEvaluator(result, [gold_sent])
    >>> de.eval() >= (0, 0)
    True

    B. Check the ARC-EAGER parser
    >>> result = parser_eager.parse([gold_sent], 'temp.arceager.model')
    >>> de = DependencyEvaluator(result, [gold_sent])
    >>> de.eval() >= (0, 0)
    True

    Remove test temporary files
    >>> remove('temp.arceager.model')
    >>> remove('temp.arcstd.model')

    Note that result is very poor because of only one training example.
    Nr   r   r   r   r   demo  s    r   )r   r~   copyr   operatorr   osr   numpyr   Zscipyr   Zsklearnr   Zsklearn.datasetsr   ImportErrorZ
nltk.parser	   r
   r   r   r3   r8   r   r   r   r   r   <module>	   s&    o  m