<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">#!/usr/bin/env python
#
# Natural Language Toolkit: TGrep search
#
# Copyright (C) 2001-2023 NLTK Project
# Author: Will Roberts &lt;wildwilhelm@gmail.com&gt;
# URL: &lt;https://www.nltk.org/&gt;
# For license information, see LICENSE.TXT

"""
Unit tests for nltk.tgrep.
"""


import unittest

from nltk import tgrep
from nltk.tree import ParentedTree


class TestSequenceFunctions(unittest.TestCase):

    """
    Class containing unit tests for nltk.tgrep.
    """

    def test_tokenize_simple(self):
        """
        Simple test of tokenization.
        """
        tokens = tgrep.tgrep_tokenize("A .. (B !&lt; C . D) | ![&lt;&lt; (E , F) $ G]")
        self.assertEqual(
            tokens,
            [
                "A",
                "..",
                "(",
                "B",
                "!",
                "&lt;",
                "C",
                ".",
                "D",
                ")",
                "|",
                "!",
                "[",
                "&lt;&lt;",
                "(",
                "E",
                ",",
                "F",
                ")",
                "$",
                "G",
                "]",
            ],
        )

    def test_tokenize_encoding(self):
        """
        Test that tokenization handles bytes and strs the same way.
        """
        self.assertEqual(
            tgrep.tgrep_tokenize(b"A .. (B !&lt; C . D) | ![&lt;&lt; (E , F) $ G]"),
            tgrep.tgrep_tokenize("A .. (B !&lt; C . D) | ![&lt;&lt; (E , F) $ G]"),
        )

    def test_tokenize_link_types(self):
        """
        Test tokenization of basic link types.
        """
        self.assertEqual(tgrep.tgrep_tokenize("A&lt;B"), ["A", "&lt;", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&gt;B"), ["A", "&gt;", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&lt;3B"), ["A", "&lt;3", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&gt;3B"), ["A", "&gt;3", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&lt;,B"), ["A", "&lt;,", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&gt;,B"), ["A", "&gt;,", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&lt;-3B"), ["A", "&lt;-3", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&gt;-3B"), ["A", "&gt;-3", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&lt;-B"), ["A", "&lt;-", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&gt;-B"), ["A", "&gt;-", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&lt;'B"), ["A", "&lt;'", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&gt;'B"), ["A", "&gt;'", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&lt;:B"), ["A", "&lt;:", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&gt;:B"), ["A", "&gt;:", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&lt;&lt;B"), ["A", "&lt;&lt;", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&gt;&gt;B"), ["A", "&gt;&gt;", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&lt;&lt;,B"), ["A", "&lt;&lt;,", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&gt;&gt;,B"), ["A", "&gt;&gt;,", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&lt;&lt;'B"), ["A", "&lt;&lt;'", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&gt;&gt;'B"), ["A", "&gt;&gt;'", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&lt;&lt;:B"), ["A", "&lt;&lt;:", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A&gt;&gt;:B"), ["A", "&gt;&gt;:", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A.B"), ["A", ".", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A,B"), ["A", ",", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A..B"), ["A", "..", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A,,B"), ["A", ",,", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A$B"), ["A", "$", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A$.B"), ["A", "$.", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A$,B"), ["A", "$,", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A$..B"), ["A", "$..", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A$,,B"), ["A", "$,,", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&lt;B"), ["A", "!", "&lt;", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&gt;B"), ["A", "!", "&gt;", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&lt;3B"), ["A", "!", "&lt;3", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&gt;3B"), ["A", "!", "&gt;3", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&lt;,B"), ["A", "!", "&lt;,", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&gt;,B"), ["A", "!", "&gt;,", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&lt;-3B"), ["A", "!", "&lt;-3", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&gt;-3B"), ["A", "!", "&gt;-3", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&lt;-B"), ["A", "!", "&lt;-", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&gt;-B"), ["A", "!", "&gt;-", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&lt;'B"), ["A", "!", "&lt;'", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&gt;'B"), ["A", "!", "&gt;'", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&lt;:B"), ["A", "!", "&lt;:", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&gt;:B"), ["A", "!", "&gt;:", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&lt;&lt;B"), ["A", "!", "&lt;&lt;", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&gt;&gt;B"), ["A", "!", "&gt;&gt;", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&lt;&lt;,B"), ["A", "!", "&lt;&lt;,", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&gt;&gt;,B"), ["A", "!", "&gt;&gt;,", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&lt;&lt;'B"), ["A", "!", "&lt;&lt;'", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&gt;&gt;'B"), ["A", "!", "&gt;&gt;'", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&lt;&lt;:B"), ["A", "!", "&lt;&lt;:", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!&gt;&gt;:B"), ["A", "!", "&gt;&gt;:", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!.B"), ["A", "!", ".", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!,B"), ["A", "!", ",", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!..B"), ["A", "!", "..", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!,,B"), ["A", "!", ",,", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!$B"), ["A", "!", "$", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!$.B"), ["A", "!", "$.", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!$,B"), ["A", "!", "$,", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!$..B"), ["A", "!", "$..", "B"])
        self.assertEqual(tgrep.tgrep_tokenize("A!$,,B"), ["A", "!", "$,,", "B"])

    def test_tokenize_examples(self):
        """
        Test tokenization of the TGrep2 manual example patterns.
        """
        self.assertEqual(tgrep.tgrep_tokenize("NP &lt; PP"), ["NP", "&lt;", "PP"])
        self.assertEqual(tgrep.tgrep_tokenize("/^NP/"), ["/^NP/"])
        self.assertEqual(
            tgrep.tgrep_tokenize("NP &lt;&lt; PP . VP"), ["NP", "&lt;&lt;", "PP", ".", "VP"]
        )
        self.assertEqual(
            tgrep.tgrep_tokenize("NP &lt;&lt; PP | . VP"), ["NP", "&lt;&lt;", "PP", "|", ".", "VP"]
        )
        self.assertEqual(
            tgrep.tgrep_tokenize("NP !&lt;&lt; PP [&gt; NP | &gt;&gt; VP]"),
            ["NP", "!", "&lt;&lt;", "PP", "[", "&gt;", "NP", "|", "&gt;&gt;", "VP", "]"],
        )
        self.assertEqual(
            tgrep.tgrep_tokenize("NP &lt;&lt; (PP . VP)"),
            ["NP", "&lt;&lt;", "(", "PP", ".", "VP", ")"],
        )
        self.assertEqual(
            tgrep.tgrep_tokenize("NP &lt;' (PP &lt;, (IN &lt; on))"),
            ["NP", "&lt;'", "(", "PP", "&lt;,", "(", "IN", "&lt;", "on", ")", ")"],
        )
        self.assertEqual(
            tgrep.tgrep_tokenize("S &lt; (A &lt; B) &lt; C"),
            ["S", "&lt;", "(", "A", "&lt;", "B", ")", "&lt;", "C"],
        )
        self.assertEqual(
            tgrep.tgrep_tokenize("S &lt; ((A &lt; B) &lt; C)"),
            ["S", "&lt;", "(", "(", "A", "&lt;", "B", ")", "&lt;", "C", ")"],
        )
        self.assertEqual(
            tgrep.tgrep_tokenize("S &lt; (A &lt; B &lt; C)"),
            ["S", "&lt;", "(", "A", "&lt;", "B", "&lt;", "C", ")"],
        )
        self.assertEqual(tgrep.tgrep_tokenize("A&lt;B&amp;.C"), ["A", "&lt;", "B", "&amp;", ".", "C"])

    def test_tokenize_quoting(self):
        """
        Test tokenization of quoting.
        """
        self.assertEqual(
            tgrep.tgrep_tokenize('"A&lt;&lt;:B"&lt;&lt;:"A $.. B"&lt;"A&gt;3B"&lt;C'),
            ['"A&lt;&lt;:B"', "&lt;&lt;:", '"A $.. B"', "&lt;", '"A&gt;3B"', "&lt;", "C"],
        )

    def test_tokenize_nodenames(self):
        """
        Test tokenization of node names.
        """
        self.assertEqual(tgrep.tgrep_tokenize("Robert"), ["Robert"])
        self.assertEqual(tgrep.tgrep_tokenize("/^[Bb]ob/"), ["/^[Bb]ob/"])
        self.assertEqual(tgrep.tgrep_tokenize("*"), ["*"])
        self.assertEqual(tgrep.tgrep_tokenize("__"), ["__"])
        # test tokenization of NLTK tree position syntax
        self.assertEqual(tgrep.tgrep_tokenize("N()"), ["N(", ")"])
        self.assertEqual(tgrep.tgrep_tokenize("N(0,)"), ["N(", "0", ",", ")"])
        self.assertEqual(tgrep.tgrep_tokenize("N(0,0)"), ["N(", "0", ",", "0", ")"])
        self.assertEqual(
            tgrep.tgrep_tokenize("N(0,0,)"), ["N(", "0", ",", "0", ",", ")"]
        )

    def test_tokenize_macros(self):
        """
        Test tokenization of macro definitions.
        """
        self.assertEqual(
            tgrep.tgrep_tokenize(
                "@ NP /^NP/;\n@ NN /^NN/;\n@NP [!&lt; NP | &lt; @NN] !$.. @NN"
            ),
            [
                "@",
                "NP",
                "/^NP/",
                ";",
                "@",
                "NN",
                "/^NN/",
                ";",
                "@NP",
                "[",
                "!",
                "&lt;",
                "NP",
                "|",
                "&lt;",
                "@NN",
                "]",
                "!",
                "$..",
                "@NN",
            ],
        )

    def test_node_simple(self):
        """
        Test a simple use of tgrep for finding nodes matching a given
        pattern.
        """
        tree = ParentedTree.fromstring(
            "(S (NP (DT the) (JJ big) (NN dog)) " "(VP bit) (NP (DT a) (NN cat)))"
        )
        self.assertEqual(list(tgrep.tgrep_positions("NN", [tree])), [[(0, 2), (2, 1)]])
        self.assertEqual(
            list(tgrep.tgrep_nodes("NN", [tree])), [[tree[0, 2], tree[2, 1]]]
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("NN|JJ", [tree])), [[(0, 1), (0, 2), (2, 1)]]
        )

    def test_node_printing(self):
        """Test that the tgrep print operator ' is properly ignored."""
        tree = ParentedTree.fromstring("(S (n x) (N x))")
        self.assertEqual(
            list(tgrep.tgrep_positions("N", [tree])),
            list(tgrep.tgrep_positions("'N", [tree])),
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("/[Nn]/", [tree])),
            list(tgrep.tgrep_positions("'/[Nn]/", [tree])),
        )

    def test_node_encoding(self):
        """
        Test that tgrep search strings handles bytes and strs the same
        way.
        """
        tree = ParentedTree.fromstring(
            "(S (NP (DT the) (JJ big) (NN dog)) " "(VP bit) (NP (DT a) (NN cat)))"
        )
        self.assertEqual(
            list(tgrep.tgrep_positions(b"NN", [tree])),
            list(tgrep.tgrep_positions(b"NN", [tree])),
        )
        self.assertEqual(
            list(tgrep.tgrep_nodes(b"NN", [tree])),
            list(tgrep.tgrep_nodes("NN", [tree])),
        )
        self.assertEqual(
            list(tgrep.tgrep_positions(b"NN|JJ", [tree])),
            list(tgrep.tgrep_positions("NN|JJ", [tree])),
        )

    def test_node_nocase(self):
        """
        Test selecting nodes using case insensitive node names.
        """
        tree = ParentedTree.fromstring("(S (n x) (N x))")
        self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[(1,)]])
        self.assertEqual(list(tgrep.tgrep_positions('i@"N"', [tree])), [[(0,), (1,)]])

    def test_node_quoted(self):
        """
        Test selecting nodes using quoted node names.
        """
        tree = ParentedTree.fromstring('(N ("N" x) (N" x) ("\\" x))')
        self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[()]])
        self.assertEqual(list(tgrep.tgrep_positions('"\\"N\\""', [tree])), [[(0,)]])
        self.assertEqual(list(tgrep.tgrep_positions('"N\\""', [tree])), [[(1,)]])
        self.assertEqual(list(tgrep.tgrep_positions('"\\"\\\\\\""', [tree])), [[(2,)]])

    def test_node_regex(self):
        """
        Test regex matching on nodes.
        """
        tree = ParentedTree.fromstring("(S (NP-SBJ x) (NP x) (NNP x) (VP x))")
        # This is a regular expression that matches any node whose
        # name starts with NP, including NP-SBJ:
        self.assertEqual(list(tgrep.tgrep_positions("/^NP/", [tree])), [[(0,), (1,)]])

    def test_node_regex_2(self):
        """
        Test regex matching on nodes.
        """
        tree = ParentedTree.fromstring("(S (SBJ x) (SBJ1 x) (NP-SBJ x))")
        self.assertEqual(list(tgrep.tgrep_positions("/^SBJ/", [tree])), [[(0,), (1,)]])
        # This is a regular expression that matches any node whose
        # name includes SBJ, including NP-SBJ:
        self.assertEqual(
            list(tgrep.tgrep_positions("/SBJ/", [tree])), [[(0,), (1,), (2,)]]
        )

    def test_node_tree_position(self):
        """
        Test matching on nodes based on NLTK tree position.
        """
        tree = ParentedTree.fromstring("(S (NP-SBJ x) (NP x) (NNP x) (VP x))")
        # test all tree positions that are not leaves
        leaf_positions = {tree.leaf_treeposition(x) for x in range(len(tree.leaves()))}
        tree_positions = [x for x in tree.treepositions() if x not in leaf_positions]
        for position in tree_positions:
            node_id = f"N{position}"
            tgrep_positions = list(tgrep.tgrep_positions(node_id, [tree]))
            self.assertEqual(len(tgrep_positions[0]), 1)
            self.assertEqual(tgrep_positions[0][0], position)

    def test_node_noleaves(self):
        """
        Test node name matching with the search_leaves flag set to False.
        """
        tree = ParentedTree.fromstring("(S (A (T x)) (B (N x)))")
        self.assertEqual(
            list(tgrep.tgrep_positions("x", [tree])), [[(0, 0, 0), (1, 0, 0)]]
        )
        self.assertEqual(list(tgrep.tgrep_positions("x", [tree], False)), [[]])

    def tests_rel_dominance(self):
        """
        Test matching nodes based on dominance relations.
        """
        tree = ParentedTree.fromstring("(S (A (T x)) (B (N x)))")
        self.assertEqual(list(tgrep.tgrep_positions("* &lt; T", [tree])), [[(0,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &lt; T &gt; S", [tree])), [[(0,)]])
        self.assertEqual(
            list(tgrep.tgrep_positions("* !&lt; T", [tree])),
            [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]],
        )
        self.assertEqual(list(tgrep.tgrep_positions("* !&lt; T &gt; S", [tree])), [[(1,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &gt; A", [tree])), [[(0, 0)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &gt; B", [tree])), [[(1, 0)]])
        self.assertEqual(
            list(tgrep.tgrep_positions("* !&gt; B", [tree])),
            [[(), (0,), (0, 0), (0, 0, 0), (1,), (1, 0, 0)]],
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* !&gt; B &gt;&gt; S", [tree])), [[(0,), (0, 0), (1,)]]
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* &gt;&gt; S", [tree])),
            [[(0,), (0, 0), (1,), (1, 0)]],
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* &gt;&gt;, S", [tree])), [[(0,), (0, 0)]]
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* &gt;&gt;' S", [tree])), [[(1,), (1, 0)]]
        )
        # Known issue:
        # self.assertEqual(list(tgrep.tgrep_positions('* !&gt;&gt; S', [tree])),
        #                 [[()]])
        self.assertEqual(list(tgrep.tgrep_positions("* &lt;&lt; T", [tree])), [[(), (0,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &lt;&lt;' T", [tree])), [[(0,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &lt;&lt;1 N", [tree])), [[(1,)]])
        self.assertEqual(
            list(tgrep.tgrep_positions("* !&lt;&lt; T", [tree])),
            [[(0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]],
        )
        tree = ParentedTree.fromstring("(S (A (T x)) (B (T x) (N x )))")
        self.assertEqual(list(tgrep.tgrep_positions("* &lt;: T", [tree])), [[(0,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &lt; T", [tree])), [[(0,), (1,)]])
        self.assertEqual(
            list(tgrep.tgrep_positions("* !&lt;: T", [tree])),
            [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0)]],
        )
        self.assertEqual(list(tgrep.tgrep_positions("* !&lt;: T &gt; S", [tree])), [[(1,)]])
        tree = ParentedTree.fromstring("(S (T (A x) (B x)) (T (C x)))")
        self.assertEqual(list(tgrep.tgrep_positions("* &gt;: T", [tree])), [[(1, 0)]])
        self.assertEqual(
            list(tgrep.tgrep_positions("* !&gt;: T", [tree])),
            [[(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0, 0)]],
        )
        tree = ParentedTree.fromstring(
            "(S (A (B (C (D (E (T x))))))" " (A (B (C (D (E (T x))) (N x)))))"
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* &lt;&lt;: T", [tree])),
            [
                [
                    (0,),
                    (0, 0),
                    (0, 0, 0),
                    (0, 0, 0, 0),
                    (0, 0, 0, 0, 0),
                    (1, 0, 0, 0),
                    (1, 0, 0, 0, 0),
                ]
            ],
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* &gt;&gt;: A", [tree])),
            [
                [
                    (0, 0),
                    (0, 0, 0),
                    (0, 0, 0, 0),
                    (0, 0, 0, 0, 0),
                    (0, 0, 0, 0, 0, 0),
                    (1, 0),
                    (1, 0, 0),
                ]
            ],
        )

    def test_bad_operator(self):
        """
        Test error handling of undefined tgrep operators.
        """
        tree = ParentedTree.fromstring("(S (A (T x)) (B (N x)))")
        self.assertRaises(
            tgrep.TgrepException, list, tgrep.tgrep_positions("* &gt;&gt;&gt; S", [tree])
        )

    def test_comments(self):
        """
        Test that comments are correctly filtered out of tgrep search
        strings.
        """
        tree = ParentedTree.fromstring("(S (NN x) (NP x) (NN x))")
        search1 = """
        @ NP /^NP/;
        @ NN /^NN/;
        @NN
        """
        self.assertEqual(list(tgrep.tgrep_positions(search1, [tree])), [[(0,), (2,)]])
        search2 = """
        # macros
        @ NP /^NP/;
        @ NN /^NN/;

        # search string
        @NN
        """
        self.assertEqual(list(tgrep.tgrep_positions(search2, [tree])), [[(0,), (2,)]])

    def test_rel_sister_nodes(self):
        """
        Test matching sister nodes in a tree.
        """
        tree = ParentedTree.fromstring("(S (A x) (B x) (C x))")
        self.assertEqual(list(tgrep.tgrep_positions("* $. B", [tree])), [[(0,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* $.. B", [tree])), [[(0,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* $, B", [tree])), [[(2,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* $,, B", [tree])), [[(2,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* $ B", [tree])), [[(0,), (2,)]])

    def tests_rel_indexed_children(self):
        """
        Test matching nodes based on their index in their parent node.
        """
        tree = ParentedTree.fromstring("(S (A x) (B x) (C x))")
        self.assertEqual(list(tgrep.tgrep_positions("* &gt;, S", [tree])), [[(0,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &gt;1 S", [tree])), [[(0,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &gt;2 S", [tree])), [[(1,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &gt;3 S", [tree])), [[(2,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &gt;' S", [tree])), [[(2,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &gt;-1 S", [tree])), [[(2,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &gt;-2 S", [tree])), [[(1,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &gt;-3 S", [tree])), [[(0,)]])
        tree = ParentedTree.fromstring(
            "(S (D (A x) (B x) (C x)) (E (B x) (C x) (A x)) " "(F (C x) (A x) (B x)))"
        )
        self.assertEqual(list(tgrep.tgrep_positions("* &lt;, A", [tree])), [[(0,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &lt;1 A", [tree])), [[(0,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &lt;2 A", [tree])), [[(2,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &lt;3 A", [tree])), [[(1,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &lt;' A", [tree])), [[(1,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &lt;-1 A", [tree])), [[(1,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &lt;-2 A", [tree])), [[(2,)]])
        self.assertEqual(list(tgrep.tgrep_positions("* &lt;-3 A", [tree])), [[(0,)]])

    def test_rel_precedence(self):
        """
        Test matching nodes based on precedence relations.
        """
        tree = ParentedTree.fromstring(
            "(S (NP (NP (PP x)) (NP (AP x)))"
            " (VP (AP (X (PP x)) (Y (AP x))))"
            " (NP (RC (NP (AP x)))))"
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* . X", [tree])), [[(0,), (0, 1), (0, 1, 0)]]
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* . Y", [tree])), [[(1, 0, 0), (1, 0, 0, 0)]]
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* .. X", [tree])),
            [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0)]],
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* .. Y", [tree])),
            [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1, 0, 0), (1, 0, 0, 0)]],
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* , X", [tree])), [[(1, 0, 1), (1, 0, 1, 0)]]
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* , Y", [tree])),
            [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* ,, X", [tree])),
            [[(1, 0, 1), (1, 0, 1, 0), (2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("* ,, Y", [tree])),
            [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
        )

    def test_examples(self):
        """
        Test the Basic Examples from the TGrep2 manual.
        """
        tree = ParentedTree.fromstring("(S (NP (AP x)) (NP (PP x)))")
        # This matches any NP node that immediately dominates a PP:
        self.assertEqual(list(tgrep.tgrep_positions("NP &lt; PP", [tree])), [[(1,)]])

        tree = ParentedTree.fromstring("(S (NP x) (VP x) (NP (PP x)) (VP x))")
        # This matches an NP that dominates a PP and is immediately
        # followed by a VP:
        self.assertEqual(list(tgrep.tgrep_positions("NP &lt;&lt; PP . VP", [tree])), [[(2,)]])

        tree = ParentedTree.fromstring(
            "(S (NP (AP x)) (NP (PP x)) " "(NP (DET x) (NN x)) (VP x))"
        )
        # This matches an NP that dominates a PP or is immediately
        # followed by a VP:
        self.assertEqual(
            list(tgrep.tgrep_positions("NP &lt;&lt; PP | . VP", [tree])), [[(1,), (2,)]]
        )

        tree = ParentedTree.fromstring(
            "(S (NP (NP (PP x)) (NP (AP x)))"
            " (VP (AP (NP (PP x)) (NP (AP x))))"
            " (NP (RC (NP (AP x)))))"
        )
        # This matches an NP that does not dominate a PP. Also, the NP
        # must either have a parent that is an NP or be dominated by a
        # VP:
        self.assertEqual(
            list(tgrep.tgrep_positions("NP !&lt;&lt; PP [&gt; NP | &gt;&gt; VP]", [tree])),
            [[(0, 1), (1, 0, 1)]],
        )

        tree = ParentedTree.fromstring(
            "(S (NP (AP (PP x) (VP x))) " "(NP (AP (PP x) (NP x))) (NP x))"
        )
        # This matches an NP that dominates a PP which itself is
        # immediately followed by a VP. Note the use of parentheses to
        # group ". VP" with the PP rather than with the NP:
        self.assertEqual(
            list(tgrep.tgrep_positions("NP &lt;&lt; (PP . VP)", [tree])), [[(0,)]]
        )

        tree = ParentedTree.fromstring(
            "(S (NP (DET a) (NN cat) (PP (IN on) (NP x)))"
            " (NP (DET a) (NN cat) (PP (IN on) (NP x)) (PP x))"
            " (NP x))"
        )
        # This matches an NP whose last child is a PP that begins with
        # the preposition "on":
        self.assertEqual(
            list(tgrep.tgrep_positions("NP &lt;' (PP &lt;, (IN &lt; on))", [tree])), [[(0,)]]
        )

        tree = ParentedTree.fromstring(
            "(S (S (C x) (A (B x))) (S (C x) (A x)) " "(S (D x) (A (B x))))"
        )
        # The following pattern matches an S which has a child A and
        # another child that is a C and that the A has a child B:
        self.assertEqual(
            list(tgrep.tgrep_positions("S &lt; (A &lt; B) &lt; C", [tree])), [[(0,)]]
        )

        tree = ParentedTree.fromstring(
            "(S (S (A (B x) (C x))) (S (S (C x) (A (B x)))))"
        )
        # However, this pattern means that S has child A and that A
        # has children B and C:
        self.assertEqual(
            list(tgrep.tgrep_positions("S &lt; ((A &lt; B) &lt; C)", [tree])), [[(0,)]]
        )

        # It is equivalent to this:
        self.assertEqual(
            list(tgrep.tgrep_positions("S &lt; (A &lt; B &lt; C)", [tree])), [[(0,)]]
        )

    def test_use_macros(self):
        """
        Test defining and using tgrep2 macros.
        """
        tree = ParentedTree.fromstring(
            "(VP (VB sold) (NP (DET the) "
            "(NN heiress)) (NP (NN deed) (PREP to) "
            "(NP (DET the) (NN school) (NN house))))"
        )
        self.assertEqual(
            list(
                tgrep.tgrep_positions(
                    "@ NP /^NP/;\n@ NN /^NN/;\n@NP !&lt; @NP !$.. @NN", [tree]
                )
            ),
            [[(1,), (2, 2)]],
        )
        # use undefined macro @CNP
        self.assertRaises(
            tgrep.TgrepException,
            list,
            tgrep.tgrep_positions(
                "@ NP /^NP/;\n@ NN /^NN/;\n@CNP !&lt; @NP !$.. @NN", [tree]
            ),
        )

    def test_tokenize_node_labels(self):
        """Test tokenization of labeled nodes."""
        self.assertEqual(
            tgrep.tgrep_tokenize("S &lt; @SBJ &lt; (@VP &lt; (@VB $.. @OBJ))"),
            [
                "S",
                "&lt;",
                "@SBJ",
                "&lt;",
                "(",
                "@VP",
                "&lt;",
                "(",
                "@VB",
                "$..",
                "@OBJ",
                ")",
                ")",
            ],
        )
        self.assertEqual(
            tgrep.tgrep_tokenize("S &lt; @SBJ=s &lt; (@VP=v &lt; (@VB $.. @OBJ))"),
            [
                "S",
                "&lt;",
                "@SBJ",
                "=",
                "s",
                "&lt;",
                "(",
                "@VP",
                "=",
                "v",
                "&lt;",
                "(",
                "@VB",
                "$..",
                "@OBJ",
                ")",
                ")",
            ],
        )

    def test_tokenize_segmented_patterns(self):
        """Test tokenization of segmented patterns."""
        self.assertEqual(
            tgrep.tgrep_tokenize("S &lt; @SBJ=s &lt; (@VP=v &lt; (@VB $.. @OBJ)) : =s .. =v"),
            [
                "S",
                "&lt;",
                "@SBJ",
                "=",
                "s",
                "&lt;",
                "(",
                "@VP",
                "=",
                "v",
                "&lt;",
                "(",
                "@VB",
                "$..",
                "@OBJ",
                ")",
                ")",
                ":",
                "=s",
                "..",
                "=v",
            ],
        )

    def test_labeled_nodes(self):
        """
        Test labeled nodes.

        Test case from Emily M. Bender.
        """
        search = """
            # macros
            @ SBJ /SBJ/;
            @ VP /VP/;
            @ VB /VB/;
            @ VPoB /V[PB]/;
            @ OBJ /OBJ/;

            # 1 svo
            S &lt; @SBJ=s &lt; (@VP=v &lt; (@VB $.. @OBJ)) : =s .. =v"""
        sent1 = ParentedTree.fromstring(
            "(S (NP-SBJ I) (VP (VB eat) (NP-OBJ (NNS apples))))"
        )
        sent2 = ParentedTree.fromstring(
            "(S (VP (VB eat) (NP-OBJ (NNS apples))) (NP-SBJ I))"
        )
        search_firsthalf = search.split("\n\n")[0] + "S &lt; @SBJ &lt; (@VP &lt; (@VB $.. @OBJ))"
        search_rewrite = "S &lt; (/.*SBJ/ $.. (/VP/ &lt; (/VB/ $.. /.*OBJ/)))"

        self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent1]))[0])
        self.assertTrue(list(tgrep.tgrep_positions(search, [sent1]))[0])
        self.assertTrue(list(tgrep.tgrep_positions(search_rewrite, [sent1]))[0])
        self.assertEqual(
            list(tgrep.tgrep_positions(search, [sent1])),
            list(tgrep.tgrep_positions(search_rewrite, [sent1])),
        )
        self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent2]))[0])
        self.assertFalse(list(tgrep.tgrep_positions(search, [sent2]))[0])
        self.assertFalse(list(tgrep.tgrep_positions(search_rewrite, [sent2]))[0])
        self.assertEqual(
            list(tgrep.tgrep_positions(search, [sent2])),
            list(tgrep.tgrep_positions(search_rewrite, [sent2])),
        )

    def test_multiple_conjs(self):
        """
        Test that multiple (3 or more) conjunctions of node relations are
        handled properly.
        """
        sent = ParentedTree.fromstring("((A (B b) (C c)) (A (B b) (C c) (D d)))")
        # search = '(A &lt; B &lt; C &lt; D)'
        # search_tworels = '(A &lt; B &lt; C)'
        self.assertEqual(
            list(tgrep.tgrep_positions("(A &lt; B &lt; C &lt; D)", [sent])), [[(1,)]]
        )
        self.assertEqual(
            list(tgrep.tgrep_positions("(A &lt; B &lt; C)", [sent])), [[(0,), (1,)]]
        )

    def test_trailing_semicolon(self):
        """
        Test that semicolons at the end of a tgrep2 search string won't
        cause a parse failure.
        """
        tree = ParentedTree.fromstring(
            "(S (NP (DT the) (JJ big) (NN dog)) " "(VP bit) (NP (DT a) (NN cat)))"
        )
        self.assertEqual(list(tgrep.tgrep_positions("NN", [tree])), [[(0, 2), (2, 1)]])
        self.assertEqual(list(tgrep.tgrep_positions("NN;", [tree])), [[(0, 2), (2, 1)]])
        self.assertEqual(
            list(tgrep.tgrep_positions("NN;;", [tree])), [[(0, 2), (2, 1)]]
        )
</pre></body></html>