U
    epK                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ dZG dd	 d	eZG d
d deZG dd deZG dd deZdS )    N)PIPE)_java_optionsconfig_javafind_jar_iterfind_jars_within_pathjava)ParserIDependencyGraph)Treez2https://nlp.stanford.edu/software/lex-parser.shtmlc                   @   sr   e Zd ZdZdZdZdZdZdZdddZ	dd Z
dddZdddZdddZdddZd ddZd!ddZdS )"GenericStanfordParserz Interface to the Stanford Parserz+stanford-parser-(\d+)(\.(\d+))+-models\.jarzstanford-parser\.jarz3edu.stanford.nlp.parser.lexparser.LexicalizedParserFN4edu/stanford/nlp/models/lexparser/englishPCFG.ser.gzutf8-mx4g c              
   C   s   t t| j|ddt|dddd d}t t| j|ddt|ddd	d d}	tj|d
 }
t|	gt	|
 | _
|| _|| _|| _|| _d S )N)ZSTANFORD_PARSERSTANFORD_CORENLP T)Zenv_varsZ
searchpathurlverboseZis_regexc                 S   s   t j| S Nospathdirname
model_pathr   r   T/var/www/html/assets/scripts/venv/lib/python3.8/site-packages/nltk/parse/stanford.py<lambda><       z0GenericStanfordParser.__init__.<locals>.<lambda>)key)ZSTANFORD_MODELSr   c                 S   s   t j| S r   r   r   r   r   r   r   I   r   r   )maxr   _JAR_stanford_url_MODEL_JAR_PATTERNr   r   splittupler   
_classpathr   	_encodingcorenlp_optionsjava_options)selfZpath_to_jarZpath_to_models_jarr   encodingr   r)   r(   Zstanford_jarZ	model_jarZstanford_dirr   r   r   __init__&   s<    		zGenericStanfordParser.__init__c              	   C   s   g }g }g }d}| dD ]~}|dkr|rB|t| g }d}q| jrh|| d| g }d}q|t| d|g g }q|| d}qt|S )NFr   
T)
splitlinesappenditer_DOUBLE_SPACED_OUTPUT
_make_treejoin)r*   Zoutput_resZ	cur_linesZ	cur_treesblankliner   r   r   _parse_trees_outputW   s&    
z)GenericStanfordParser._parse_trees_outputc              
   C   sB   | j d| jddd| jdddg
}| | |dd	d
 |D |S )a  
        Use StanfordParser to parse multiple sentences. Takes multiple sentences as a
        list where each sentence is a list of words.
        Each sentence will be automatically tagged with this StanfordParser instance's
        tagger.
        If whitespaces exists inside a token, then the token will be treated as
        separate tokens.

        :param sentences: Input sentences to parse
        :type sentences: list(list(str))
        :rtype: iter(iter(Tree))
        -model
-sentencesnewline-outputFormat
-tokenizedz-escaperz-edu.stanford.nlp.process.PTBEscapingProcessorr-   c                 s   s   | ]}d  |V  qdS ) Nr3   .0sentencer   r   r   	<genexpr>   s     z4GenericStanfordParser.parse_sents.<locals>.<genexpr>_MAIN_CLASSr   _OUTPUT_FORMATr7   _executer3   r*   	sentencesr   cmdr   r   r   parse_sentsn   s$      z!GenericStanfordParser.parse_sentsc                 C   s   t | |g|S )a&  
        Use StanfordParser to parse a sentence. Takes a sentence as a string;
        before parsing, it will be automatically tokenized and tagged by
        the Stanford Parser.

        :param sentence: Input sentence to parse
        :type sentence: str
        :rtype: iter(Tree)
        )nextraw_parse_sentsr*   rA   r   r   r   r   	raw_parse   s    
zGenericStanfordParser.raw_parsec                 C   s2   | j d| jddd| jg}| | |d||S )aI  
        Use StanfordParser to parse multiple sentences. Takes multiple sentences as a
        list of strings.
        Each sentence will be automatically tokenized and tagged by the Stanford Parser.

        :param sentences: Input sentences to parse
        :type sentences: list(str)
        :rtype: iter(iter(Tree))
        r8   r9   r:   r;   r-   rC   rG   r   r   r   rL      s    	z%GenericStanfordParser.raw_parse_sentsc                 C   s   t | |g|S )a0  
        Use StanfordParser to parse a sentence. Takes a sentence as a list of
        (word, tag) tuples; the sentence must have already been tokenized and
        tagged.

        :param sentence: Input sentence to parse
        :type sentence: list(tuple(str, str))
        :rtype: iter(Tree)
        )rK   tagged_parse_sentsrM   r   r   r   tagged_parse   s    
z"GenericStanfordParser.tagged_parsec                    sR   d | j d| jddd| jdd dd	d
dg}| | |d fdd|D |S )ad  
        Use StanfordParser to parse multiple sentences. Takes multiple sentences
        where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentences: list(list(tuple(str, str)))
        :rtype: iter(iter(Tree))
        /r8   r9   r:   r;   r<   z-tagSeparatorz-tokenizerFactoryz,edu.stanford.nlp.process.WhitespaceTokenizerz-tokenizerMethodZnewCoreLabelTokenizerFactoryr-   c                 3   s&   | ]}d   fdd|D V  qdS )r=   c                 3   s   | ]}  |V  qd S r   r>   )r@   ZtaggedZtag_separatorr   r   rB      s     zEGenericStanfordParser.tagged_parse_sents.<locals>.<genexpr>.<genexpr>Nr>   r?   rR   r   r   rB      s   z;GenericStanfordParser.tagged_parse_sents.<locals>.<genexpr>rC   rG   r   rR   r   rO      s2    
z(GenericStanfordParser.tagged_parse_sentsc           	   	   C   s  | j }|d|g | jr*|| j  dt}t| j|d tj	ddd}t
|trj|rj||}|| |  | jr|d t|| j|ttd\}}n"||j t|| jttd	\}}|d
d}|dd}||}W 5 Q R X t|j t|dd |S )Nz	-encodingr=   )optionsr   wbF)modedeleter   )	classpathstdinstdoutstderr)rW   rY   rZ   s        s    )r'   extendr(   r$   r3   r   r   r)   tempfileNamedTemporaryFile
isinstancestrencodewriteflush
_USE_STDINseekr   r&   r   r/   namereplacedecoder   unlink)	r*   rI   input_r   r+   Zdefault_optionsZ
input_filerY   rZ   r   r   r   rF      sB    



   
zGenericStanfordParser._execute)NNr   r   Fr   r   )F)F)F)F)F)F)__name__
__module____qualname____doc__r#   r!   rD   rd   r1   r,   r7   rJ   rN   rL   rP   rO   rF   r   r   r   r   r      s*          
1




'r   c                       s,   e Zd ZdZdZ fddZdd Z  ZS )StanfordParsera  
    >>> parser=StanfordParser(
    ...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
    ... ) # doctest: +SKIP

    >>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog")) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
    Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])]

    >>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents((
    ...     "the quick brown fox jumps over the lazy dog",
    ...     "the quick grey wolf jumps over the lazy fox"
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
    Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP',
    [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['grey']), Tree('NN', ['wolf'])]), Tree('NP',
    [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']),
    Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])]

    >>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents((
    ...     "I 'm a dog".split(),
    ...     "This is my friends ' cat ( the tabby )".split(),
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]),
    Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP',
    [Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']),
    Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', [Tree('', []),
    Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', [])])])])])])])]

    >>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents((
    ...     (
    ...         ("The", "DT"),
    ...         ("quick", "JJ"),
    ...         ("brown", "JJ"),
    ...         ("fox", "NN"),
    ...         ("jumped", "VBD"),
    ...         ("over", "IN"),
    ...         ("the", "DT"),
    ...         ("lazy", "JJ"),
    ...         ("dog", "NN"),
    ...         (".", "."),
    ...     ),
    ... ))],[]) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP',
    [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]
    Zpennc                    s"   t jdtdd t j|| d S )NzcThe StanfordParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPParser[0m instead.   
stacklevelwarningswarnDeprecationWarningsuperr,   r*   argskwargs	__class__r   r   r,   G  s    zStanfordParser.__init__c                 C   s
   t |S r   )r   
fromstringr*   resultr   r   r   r2   Q  s    zStanfordParser._make_treerk   rl   rm   rn   rE   r,   r2   __classcell__r   r   r{   r   ro     s   2
ro   c                       s,   e Zd ZdZdZ fddZdd Z  ZS )StanfordDependencyParsera
  
    >>> dep_parser=StanfordDependencyParser(
    ...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
    ... ) # doctest: +SKIP

    >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])])]

    >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
    [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
    ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
    ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
    ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
    ...     "The quick brown fox jumps over the lazy dog.",
    ...     "The quick grey wolf jumps over the lazy fox."
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])]),
    Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy'])])]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
    ...     "I 'm a dog".split(),
    ...     "This is my friends ' cat ( the tabby )".split(),
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['the'])])]

    >>> sum([[list(parse.triples()) for parse in dep_graphs] for dep_graphs in dep_parser.tagged_parse_sents((
    ...     (
    ...         ("The", "DT"),
    ...         ("quick", "JJ"),
    ...         ("brown", "JJ"),
    ...         ("fox", "NN"),
    ...         ("jumped", "VBD"),
    ...         ("over", "IN"),
    ...         ("the", "DT"),
    ...         ("lazy", "JJ"),
    ...         ("dog", "NN"),
    ...         (".", "."),
    ...     ),
    ... ))],[]) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [[((u'jumped', u'VBD'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
    ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
    ((u'jumped', u'VBD'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
    ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]

    Z	conll2007c                    s"   t jdtdd t j|| d S )NzwThe StanfordDependencyParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPDependencyParser[0m instead.rp   rq   rs   rx   r{   r   r   r,     s    z!StanfordDependencyParser.__init__c                 C   s   t |ddS )NrootZtop_relation_labelr	   r~   r   r   r   r2     s    z#StanfordDependencyParser._make_treer   r   r   r{   r   r   U  s   0
r   c                       sJ   e Zd ZdZdZdZdZdZdZdZ	 fddZ
dd
dZdd Z  ZS )StanfordNeuralDependencyParserar  
    >>> from nltk.parse.stanford import StanfordNeuralDependencyParser # doctest: +SKIP
    >>> dep_parser=StanfordNeuralDependencyParser(java_options='-mx4g')# doctest: +SKIP

    >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy']), '.'])]

    >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
    [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det',
    (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'),
    u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')),
    ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det',
    (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ')), ((u'jumps', u'VBZ'),
    u'punct', (u'.', u'.'))]]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
    ...     "The quick brown fox jumps over the lazy dog.",
    ...     "The quick grey wolf jumps over the lazy fox."
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over',
    'the', 'lazy']), '.']), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']),
    Tree('fox', ['over', 'the', 'lazy']), '.'])]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
    ...     "I 'm a dog".split(),
    ...     "This is my friends ' cat ( the tabby )".split(),
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
    [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends',
    ['my', "'"]), Tree('tabby', ['-LRB-', 'the', '-RRB-'])])]
    Zconllz)edu.stanford.nlp.pipeline.StanfordCoreNLPz%stanford-corenlp-(\d+)(\.(\d+))+\.jarz,stanford-corenlp-(\d+)(\.(\d+))+-models\.jarTc                    s0   t jdtdd t j|| |  jd7  _d S )Nz}The StanfordNeuralDependencyParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPDependencyParser[0m instead.rp   rq   z(-annotators tokenize,ssplit,pos,depparse)rt   ru   rv   rw   r,   r(   rx   r{   r   r   r,     s    z'StanfordNeuralDependencyParser.__init__Fc                 C   s   t ddS )z
        Currently unimplemented because the neural dependency parser (and
        the StanfordCoreNLP pipeline class) doesn't support passing in pre-
        tagged tokens.
        zxtagged_parse[_sents] is not supported by StanfordNeuralDependencyParser; use parse[_sents] or raw_parse[_sents] instead.N)NotImplementedError)r*   rH   r   r   r   r   rO     s    z1StanfordNeuralDependencyParser.tagged_parse_sentsc                 C   s   t |ddS )NROOTr   r	   r~   r   r   r   r2     s    z)StanfordNeuralDependencyParser._make_tree)F)rk   rl   rm   rn   rE   rD   r!   r#   rd   r1   r,   rO   r2   r   r   r   r{   r   r     s   
r   )r   r]   rt   
subprocessr   Znltk.internalsr   r   r   r   r   Znltk.parse.apir   Znltk.parse.dependencygraphr
   Z	nltk.treer   r"   r   ro   r   r   r   r   r   r   <module>	   s    wCB