PCQRSCANER/venv/Lib/site-packages/nltk/tag/brill.py

# -*- coding: utf-8 -*-
# Natural Language Toolkit: Transformation-based learning
#
# Copyright (C) 2001-2019 NLTK Project
# Author: Marcus Uneson <marcus.uneson@gmail.com>
#   based on previous (nltk2) version by
#   Christopher Maloof, Edward Loper, Steven Bird
# URL: <http://nltk.org/>
# For license information, see  LICENSE.TXT

from __future__ import print_function, division

from collections import defaultdict, Counter

from nltk.tag import TaggerI
from nltk.tbl import Feature, Template
from nltk import jsontags


######################################################################
# Brill Templates
######################################################################


@jsontags.register_tag
class Word(Feature):
    """
    Feature which examines the text (word) of nearby tokens.
    """

    json_tag = 'nltk.tag.brill.Word'

    @staticmethod
    def extract_property(tokens, index):
        """@return: The given token's text."""
        return tokens[index][0]


@jsontags.register_tag
class Pos(Feature):
    """
    Feature which examines the tags of nearby tokens.
    """

    json_tag = 'nltk.tag.brill.Pos'

    @staticmethod
    def extract_property(tokens, index):
        """@return: The given token's tag."""
        return tokens[index][1]


def nltkdemo18():
    """
    Return 18 templates, from the original nltk demo, in multi-feature syntax
    """
    return [
        Template(Pos([-1])),
        Template(Pos([1])),
        Template(Pos([-2])),
        Template(Pos([2])),
        Template(Pos([-2, -1])),
        Template(Pos([1, 2])),
        Template(Pos([-3, -2, -1])),
        Template(Pos([1, 2, 3])),
        Template(Pos([-1]), Pos([1])),
        Template(Word([-1])),
        Template(Word([1])),
        Template(Word([-2])),
        Template(Word([2])),
        Template(Word([-2, -1])),
        Template(Word([1, 2])),
        Template(Word([-3, -2, -1])),
        Template(Word([1, 2, 3])),
        Template(Word([-1]), Word([1])),
    ]


def nltkdemo18plus():
    """
    Return 18 templates, from the original nltk demo, and additionally a few
    multi-feature ones (the motivation is easy comparison with nltkdemo18)
    """
    return nltkdemo18() + [
        Template(Word([-1]), Pos([1])),
        Template(Pos([-1]), Word([1])),
        Template(Word([-1]), Word([0]), Pos([1])),
        Template(Pos([-1]), Word([0]), Word([1])),
        Template(Pos([-1]), Word([0]), Pos([1])),
    ]


def fntbl37():
    """
    Return 37 templates taken from the postagging task of the
    fntbl distribution http://www.cs.jhu.edu/~rflorian/fntbl/
    (37 is after excluding a handful which do not condition on Pos[0];
    fntbl can do that but the current nltk implementation cannot.)
    """
    return [
        Template(Word([0]), Word([1]), Word([2])),
        Template(Word([-1]), Word([0]), Word([1])),
        Template(Word([0]), Word([-1])),
        Template(Word([0]), Word([1])),
        Template(Word([0]), Word([2])),
        Template(Word([0]), Word([-2])),
        Template(Word([1, 2])),
        Template(Word([-2, -1])),
        Template(Word([1, 2, 3])),
        Template(Word([-3, -2, -1])),
        Template(Word([0]), Pos([2])),
        Template(Word([0]), Pos([-2])),
        Template(Word([0]), Pos([1])),
        Template(Word([0]), Pos([-1])),
        Template(Word([0])),
        Template(Word([-2])),
        Template(Word([2])),
        Template(Word([1])),
        Template(Word([-1])),
        Template(Pos([-1]), Pos([1])),
        Template(Pos([1]), Pos([2])),
        Template(Pos([-1]), Pos([-2])),
        Template(Pos([1])),
        Template(Pos([-1])),
        Template(Pos([-2])),
        Template(Pos([2])),
        Template(Pos([1, 2, 3])),
        Template(Pos([1, 2])),
        Template(Pos([-3, -2, -1])),
        Template(Pos([-2, -1])),
        Template(Pos([1]), Word([0]), Word([1])),
        Template(Pos([1]), Word([0]), Word([-1])),
        Template(Pos([-1]), Word([-1]), Word([0])),
        Template(Pos([-1]), Word([0]), Word([1])),
        Template(Pos([-2]), Pos([-1])),
        Template(Pos([1]), Pos([2])),
        Template(Pos([1]), Pos([2]), Word([1])),
    ]


def brill24():
    """
    Return 24 templates of the seminal TBL paper, Brill (1995)
    """
    return [
        Template(Pos([-1])),
        Template(Pos([1])),
        Template(Pos([-2])),
        Template(Pos([2])),
        Template(Pos([-2, -1])),
        Template(Pos([1, 2])),
        Template(Pos([-3, -2, -1])),
        Template(Pos([1, 2, 3])),
        Template(Pos([-1]), Pos([1])),
        Template(Pos([-2]), Pos([-1])),
        Template(Pos([1]), Pos([2])),
        Template(Word([-1])),
        Template(Word([1])),
        Template(Word([-2])),
        Template(Word([2])),
        Template(Word([-2, -1])),
        Template(Word([1, 2])),
        Template(Word([-1, 0])),
        Template(Word([0, 1])),
        Template(Word([0])),
        Template(Word([-1]), Pos([-1])),
        Template(Word([1]), Pos([1])),
        Template(Word([0]), Word([-1]), Pos([-1])),
        Template(Word([0]), Word([1]), Pos([1])),
    ]


def describe_template_sets():
    """
    Print the available template sets in this demo, with a short description"
    """
    import inspect
    import sys

    # a bit of magic to get all functions in this module
    templatesets = inspect.getmembers(sys.modules[__name__], inspect.isfunction)
    for (name, obj) in templatesets:
        if name == "describe_template_sets":
            continue
        print(name, obj.__doc__, "\n")


######################################################################
# The Brill Tagger
######################################################################


@jsontags.register_tag
class BrillTagger(TaggerI):
    """
    Brill's transformational rule-based tagger.  Brill taggers use an
    initial tagger (such as ``tag.DefaultTagger``) to assign an initial
    tag sequence to a text; and then apply an ordered list of
    transformational rules to correct the tags of individual tokens.
    These transformation rules are specified by the ``TagRule``
    interface.

    Brill taggers can be created directly, from an initial tagger and
    a list of transformational rules; but more often, Brill taggers
    are created by learning rules from a training corpus, using one
    of the TaggerTrainers available.
    """

    json_tag = 'nltk.tag.BrillTagger'

    def __init__(self, initial_tagger, rules, training_stats=None):
        """
        :param initial_tagger: The initial tagger
        :type initial_tagger: TaggerI

        :param rules: An ordered list of transformation rules that
            should be used to correct the initial tagging.
        :type rules: list(TagRule)

        :param training_stats: A dictionary of statistics collected
            during training, for possible later use
        :type training_stats: dict

        """
        self._initial_tagger = initial_tagger
        self._rules = tuple(rules)
        self._training_stats = training_stats

    def encode_json_obj(self):
        return self._initial_tagger, self._rules, self._training_stats

    @classmethod
    def decode_json_obj(cls, obj):
        _initial_tagger, _rules, _training_stats = obj
        return cls(_initial_tagger, _rules, _training_stats)

    def rules(self):
        """
        Return the ordered list of  transformation rules that this tagger has learnt

        :return: the ordered list of transformation rules that correct the initial tagging
        :rtype: list of Rules
        """
        return self._rules

    def train_stats(self, statistic=None):
        """
        Return a named statistic collected during training, or a dictionary of all
        available statistics if no name given

        :param statistic: name of statistic
        :type statistic: str
        :return: some statistic collected during training of this tagger
        :rtype: any (but usually a number)
        """
        if statistic is None:
            return self._training_stats
        else:
            return self._training_stats.get(statistic)

    def tag(self, tokens):
        # Inherit documentation from TaggerI

        # Run the initial tagger.
        tagged_tokens = self._initial_tagger.tag(tokens)

        # Create a dictionary that maps each tag to a list of the
        # indices of tokens that have that tag.
        tag_to_positions = defaultdict(set)
        for i, (token, tag) in enumerate(tagged_tokens):
            tag_to_positions[tag].add(i)

        # Apply each rule, in order.  Only try to apply rules at
        # positions that have the desired original tag.
        for rule in self._rules:
            # Find the positions where it might apply
            positions = tag_to_positions.get(rule.original_tag, [])
            # Apply the rule at those positions.
            changed = rule.apply(tagged_tokens, positions)
            # Update tag_to_positions with the positions of tags that
            # were modified.
            for i in changed:
                tag_to_positions[rule.original_tag].remove(i)
                tag_to_positions[rule.replacement_tag].add(i)

        return tagged_tokens

    def print_template_statistics(self, test_stats=None, printunused=True):
        """
        Print a list of all templates, ranked according to efficiency.

        If test_stats is available, the templates are ranked according to their
        relative contribution (summed for all rules created from a given template,
        weighted by score) to the performance on the test set. If no test_stats, then
        statistics collected during training are used instead. There is also
        an unweighted measure (just counting the rules). This is less informative,
        though, as many low-score rules will appear towards end of training.

        :param test_stats: dictionary of statistics collected during testing
        :type test_stats: dict of str -> any (but usually numbers)
        :param printunused: if True, print a list of all unused templates
        :type printunused: bool
        :return: None
        :rtype: None
        """
        tids = [r.templateid for r in self._rules]
        train_stats = self.train_stats()

        trainscores = train_stats['rulescores']
        assert len(trainscores) == len(tids), (
            "corrupt statistics: "
            "{0} train scores for {1} rules".format(trainscores, tids)
        )
        template_counts = Counter(tids)
        weighted_traincounts = Counter()
        for (tid, score) in zip(tids, trainscores):
            weighted_traincounts[tid] += score
        tottrainscores = sum(trainscores)

        # det_tplsort() is for deterministic sorting;
        # the otherwise convenient Counter.most_common() unfortunately
        # does not break ties deterministically
        # between python versions and will break cross-version tests
        def det_tplsort(tpl_value):
            return (tpl_value[1], repr(tpl_value[0]))

        def print_train_stats():
            print(
                "TEMPLATE STATISTICS (TRAIN)  {0} templates, {1} rules)".format(
                    len(template_counts), len(tids)
                )
            )
            print(
                "TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
                "final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats)
            )
            head = "#ID | Score (train) |  #Rules     | Template"
            print(head, "\n", "-" * len(head), sep="")
            train_tplscores = sorted(
                weighted_traincounts.items(), key=det_tplsort, reverse=True
            )
            for (tid, trainscore) in train_tplscores:
                s = "{0} | {1:5d}   {2:5.3f} |{3:4d}   {4:.3f} | {5}".format(
                    tid,
                    trainscore,
                    trainscore / tottrainscores,
                    template_counts[tid],
                    template_counts[tid] / len(tids),
                    Template.ALLTEMPLATES[int(tid)],
                )
                print(s)

        def print_testtrain_stats():
            testscores = test_stats['rulescores']
            print(
                "TEMPLATE STATISTICS (TEST AND TRAIN) ({0} templates, {1} rules)".format(
                    len(template_counts), len(tids)
                )
            )
            print(
                "TEST  ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
                "final: {finalerrors:5d} {finalacc:.4f} ".format(**test_stats)
            )
            print(
                "TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
                "final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats)
            )
            weighted_testcounts = Counter()
            for (tid, score) in zip(tids, testscores):
                weighted_testcounts[tid] += score
            tottestscores = sum(testscores)
            head = "#ID | Score (test) | Score (train) |  #Rules     | Template"
            print(head, "\n", "-" * len(head), sep="")
            test_tplscores = sorted(
                weighted_testcounts.items(), key=det_tplsort, reverse=True
            )
            for (tid, testscore) in test_tplscores:
                s = "{0:s} |{1:5d}  {2:6.3f} |  {3:4d}   {4:.3f} |{5:4d}   {6:.3f} | {7:s}".format(
                    tid,
                    testscore,
                    testscore / tottestscores,
                    weighted_traincounts[tid],
                    weighted_traincounts[tid] / tottrainscores,
                    template_counts[tid],
                    template_counts[tid] / len(tids),
                    Template.ALLTEMPLATES[int(tid)],
                )
                print(s)

        def print_unused_templates():
            usedtpls = set(int(tid) for tid in tids)
            unused = [
                (tid, tpl)
                for (tid, tpl) in enumerate(Template.ALLTEMPLATES)
                if tid not in usedtpls
            ]
            print("UNUSED TEMPLATES ({0})".format(len(unused)))

            for (tid, tpl) in unused:
                print("{0:03d} {1:s}".format(tid, str(tpl)))

        if test_stats is None:
            print_train_stats()
        else:
            print_testtrain_stats()
        print()
        if printunused:
            print_unused_templates()
        print()

    def batch_tag_incremental(self, sequences, gold):
        """
        Tags by applying each rule to the entire corpus (rather than all rules to a
        single sequence). The point is to collect statistics on the test set for
        individual rules.

        NOTE: This is inefficient (does not build any index, so will traverse the entire
        corpus N times for N rules) -- usually you would not care about statistics for
        individual rules and thus use batch_tag() instead

        :param sequences: lists of token sequences (sentences, in some applications) to be tagged
        :type sequences: list of list of strings
        :param gold: the gold standard
        :type gold: list of list of strings
        :returns: tuple of (tagged_sequences, ordered list of rule scores (one for each rule))
        """

        def counterrors(xs):
            return sum(t[1] != g[1] for pair in zip(xs, gold) for (t, g) in zip(*pair))

        testing_stats = {}
        testing_stats['tokencount'] = sum(len(t) for t in sequences)
        testing_stats['sequencecount'] = len(sequences)
        tagged_tokenses = [self._initial_tagger.tag(tokens) for tokens in sequences]
        testing_stats['initialerrors'] = counterrors(tagged_tokenses)
        testing_stats['initialacc'] = (
            1 - testing_stats['initialerrors'] / testing_stats['tokencount']
        )
        # Apply each rule to the entire corpus, in order
        errors = [testing_stats['initialerrors']]
        for rule in self._rules:
            for tagged_tokens in tagged_tokenses:
                rule.apply(tagged_tokens)
            errors.append(counterrors(tagged_tokenses))
        testing_stats['rulescores'] = [
            err0 - err1 for (err0, err1) in zip(errors, errors[1:])
        ]
        testing_stats['finalerrors'] = errors[-1]
        testing_stats['finalacc'] = (
            1 - testing_stats['finalerrors'] / testing_stats['tokencount']
        )
        return (tagged_tokenses, testing_stats)
3 2019-12-22 21:51:47 +01:00			`# -- coding: utf-8 --`
			`# Natural Language Toolkit: Transformation-based learning`
			`#`
			`# Copyright (C) 2001-2019 NLTK Project`
			`# Author: Marcus Uneson <marcus.uneson@gmail.com>`
			`# based on previous (nltk2) version by`
			`# Christopher Maloof, Edward Loper, Steven Bird`
			`# URL: <http://nltk.org/>`
			`# For license information, see LICENSE.TXT`

			`from __future__ import print_function, division`

			`from collections import defaultdict, Counter`

			`from nltk.tag import TaggerI`
			`from nltk.tbl import Feature, Template`
			`from nltk import jsontags`


			`######################################################################`
			`# Brill Templates`
			`######################################################################`


			`@jsontags.register_tag`
			`class Word(Feature):`
			`"""`
			`Feature which examines the text (word) of nearby tokens.`
			`"""`

			`json_tag = 'nltk.tag.brill.Word'`

			`@staticmethod`
			`def extract_property(tokens, index):`
			`"""@return: The given token's text."""`
			`return tokens[index][0]`


			`@jsontags.register_tag`
			`class Pos(Feature):`
			`"""`
			`Feature which examines the tags of nearby tokens.`
			`"""`

			`json_tag = 'nltk.tag.brill.Pos'`

			`@staticmethod`
			`def extract_property(tokens, index):`
			`"""@return: The given token's tag."""`
			`return tokens[index][1]`


			`def nltkdemo18():`
			`"""`
			`Return 18 templates, from the original nltk demo, in multi-feature syntax`
			`"""`
			`return [`
			`Template(Pos([-1])),`
			`Template(Pos([1])),`
			`Template(Pos([-2])),`
			`Template(Pos([2])),`
			`Template(Pos([-2, -1])),`
			`Template(Pos([1, 2])),`
			`Template(Pos([-3, -2, -1])),`
			`Template(Pos([1, 2, 3])),`
			`Template(Pos([-1]), Pos([1])),`
			`Template(Word([-1])),`
			`Template(Word([1])),`
			`Template(Word([-2])),`
			`Template(Word([2])),`
			`Template(Word([-2, -1])),`
			`Template(Word([1, 2])),`
			`Template(Word([-3, -2, -1])),`
			`Template(Word([1, 2, 3])),`
			`Template(Word([-1]), Word([1])),`
			`]`


			`def nltkdemo18plus():`
			`"""`
			`Return 18 templates, from the original nltk demo, and additionally a few`
			`multi-feature ones (the motivation is easy comparison with nltkdemo18)`
			`"""`
			`return nltkdemo18() + [`
			`Template(Word([-1]), Pos([1])),`
			`Template(Pos([-1]), Word([1])),`
			`Template(Word([-1]), Word([0]), Pos([1])),`
			`Template(Pos([-1]), Word([0]), Word([1])),`
			`Template(Pos([-1]), Word([0]), Pos([1])),`
			`]`


			`def fntbl37():`
			`"""`
			`Return 37 templates taken from the postagging task of the`
			`fntbl distribution http://www.cs.jhu.edu/~rflorian/fntbl/`
			`(37 is after excluding a handful which do not condition on Pos[0];`
			`fntbl can do that but the current nltk implementation cannot.)`
			`"""`
			`return [`
			`Template(Word([0]), Word([1]), Word([2])),`
			`Template(Word([-1]), Word([0]), Word([1])),`
			`Template(Word([0]), Word([-1])),`
			`Template(Word([0]), Word([1])),`
			`Template(Word([0]), Word([2])),`
			`Template(Word([0]), Word([-2])),`
			`Template(Word([1, 2])),`
			`Template(Word([-2, -1])),`
			`Template(Word([1, 2, 3])),`
			`Template(Word([-3, -2, -1])),`
			`Template(Word([0]), Pos([2])),`
			`Template(Word([0]), Pos([-2])),`
			`Template(Word([0]), Pos([1])),`
			`Template(Word([0]), Pos([-1])),`
			`Template(Word([0])),`
			`Template(Word([-2])),`
			`Template(Word([2])),`
			`Template(Word([1])),`
			`Template(Word([-1])),`
			`Template(Pos([-1]), Pos([1])),`
			`Template(Pos([1]), Pos([2])),`
			`Template(Pos([-1]), Pos([-2])),`
			`Template(Pos([1])),`
			`Template(Pos([-1])),`
			`Template(Pos([-2])),`
			`Template(Pos([2])),`
			`Template(Pos([1, 2, 3])),`
			`Template(Pos([1, 2])),`
			`Template(Pos([-3, -2, -1])),`
			`Template(Pos([-2, -1])),`
			`Template(Pos([1]), Word([0]), Word([1])),`
			`Template(Pos([1]), Word([0]), Word([-1])),`
			`Template(Pos([-1]), Word([-1]), Word([0])),`
			`Template(Pos([-1]), Word([0]), Word([1])),`
			`Template(Pos([-2]), Pos([-1])),`
			`Template(Pos([1]), Pos([2])),`
			`Template(Pos([1]), Pos([2]), Word([1])),`
			`]`


			`def brill24():`
			`"""`
			`Return 24 templates of the seminal TBL paper, Brill (1995)`
			`"""`
			`return [`
			`Template(Pos([-1])),`
			`Template(Pos([1])),`
			`Template(Pos([-2])),`
			`Template(Pos([2])),`
			`Template(Pos([-2, -1])),`
			`Template(Pos([1, 2])),`
			`Template(Pos([-3, -2, -1])),`
			`Template(Pos([1, 2, 3])),`
			`Template(Pos([-1]), Pos([1])),`
			`Template(Pos([-2]), Pos([-1])),`
			`Template(Pos([1]), Pos([2])),`
			`Template(Word([-1])),`
			`Template(Word([1])),`
			`Template(Word([-2])),`
			`Template(Word([2])),`
			`Template(Word([-2, -1])),`
			`Template(Word([1, 2])),`
			`Template(Word([-1, 0])),`
			`Template(Word([0, 1])),`
			`Template(Word([0])),`
			`Template(Word([-1]), Pos([-1])),`
			`Template(Word([1]), Pos([1])),`
			`Template(Word([0]), Word([-1]), Pos([-1])),`
			`Template(Word([0]), Word([1]), Pos([1])),`
			`]`


			`def describe_template_sets():`
			`"""`
			`Print the available template sets in this demo, with a short description"`
			`"""`
			`import inspect`
			`import sys`

			`# a bit of magic to get all functions in this module`
			`templatesets = inspect.getmembers(sys.modules[__name__], inspect.isfunction)`
			`for (name, obj) in templatesets:`
			`if name == "describe_template_sets":`
			`continue`
			`print(name, obj.__doc__, "\n")`


			`######################################################################`
			`# The Brill Tagger`
			`######################################################################`


			`@jsontags.register_tag`
			`class BrillTagger(TaggerI):`
			`"""`
			`Brill's transformational rule-based tagger. Brill taggers use an`
			initial tagger (such as ``tag.DefaultTagger``) to assign an initial
			`tag sequence to a text; and then apply an ordered list of`
			`transformational rules to correct the tags of individual tokens.`
			These transformation rules are specified by the ``TagRule``
			`interface.`

			`Brill taggers can be created directly, from an initial tagger and`
			`a list of transformational rules; but more often, Brill taggers`
			`are created by learning rules from a training corpus, using one`
			`of the TaggerTrainers available.`
			`"""`

			`json_tag = 'nltk.tag.BrillTagger'`

			`def __init__(self, initial_tagger, rules, training_stats=None):`
			`"""`
			`:param initial_tagger: The initial tagger`
			`:type initial_tagger: TaggerI`

			`:param rules: An ordered list of transformation rules that`
			`should be used to correct the initial tagging.`
			`:type rules: list(TagRule)`

			`:param training_stats: A dictionary of statistics collected`
			`during training, for possible later use`
			`:type training_stats: dict`

			`"""`
			`self._initial_tagger = initial_tagger`
			`self._rules = tuple(rules)`
			`self._training_stats = training_stats`

			`def encode_json_obj(self):`
			`return self._initial_tagger, self._rules, self._training_stats`

			`@classmethod`
			`def decode_json_obj(cls, obj):`
			`_initial_tagger, _rules, _training_stats = obj`
			`return cls(_initial_tagger, _rules, _training_stats)`

			`def rules(self):`
			`"""`
			`Return the ordered list of transformation rules that this tagger has learnt`

			`:return: the ordered list of transformation rules that correct the initial tagging`
			`:rtype: list of Rules`
			`"""`
			`return self._rules`

			`def train_stats(self, statistic=None):`
			`"""`
			`Return a named statistic collected during training, or a dictionary of all`
			`available statistics if no name given`

			`:param statistic: name of statistic`
			`:type statistic: str`
			`:return: some statistic collected during training of this tagger`
			`:rtype: any (but usually a number)`
			`"""`
			`if statistic is None:`
			`return self._training_stats`
			`else:`
			`return self._training_stats.get(statistic)`

			`def tag(self, tokens):`
			`# Inherit documentation from TaggerI`

			`# Run the initial tagger.`
			`tagged_tokens = self._initial_tagger.tag(tokens)`

			`# Create a dictionary that maps each tag to a list of the`
			`# indices of tokens that have that tag.`
			`tag_to_positions = defaultdict(set)`
			`for i, (token, tag) in enumerate(tagged_tokens):`
			`tag_to_positions[tag].add(i)`

			`# Apply each rule, in order. Only try to apply rules at`
			`# positions that have the desired original tag.`
			`for rule in self._rules:`
			`# Find the positions where it might apply`
			`positions = tag_to_positions.get(rule.original_tag, [])`
			`# Apply the rule at those positions.`
			`changed = rule.apply(tagged_tokens, positions)`
			`# Update tag_to_positions with the positions of tags that`
			`# were modified.`
			`for i in changed:`
			`tag_to_positions[rule.original_tag].remove(i)`
			`tag_to_positions[rule.replacement_tag].add(i)`

			`return tagged_tokens`

			`def print_template_statistics(self, test_stats=None, printunused=True):`
			`"""`
			`Print a list of all templates, ranked according to efficiency.`

			`If test_stats is available, the templates are ranked according to their`
			`relative contribution (summed for all rules created from a given template,`
			`weighted by score) to the performance on the test set. If no test_stats, then`
			`statistics collected during training are used instead. There is also`
			`an unweighted measure (just counting the rules). This is less informative,`
			`though, as many low-score rules will appear towards end of training.`

			`:param test_stats: dictionary of statistics collected during testing`
			`:type test_stats: dict of str -> any (but usually numbers)`
			`:param printunused: if True, print a list of all unused templates`
			`:type printunused: bool`
			`:return: None`
			`:rtype: None`
			`"""`
			`tids = [r.templateid for r in self._rules]`
			`train_stats = self.train_stats()`

			`trainscores = train_stats['rulescores']`
			`assert len(trainscores) == len(tids), (`
			`"corrupt statistics: "`
			`"{0} train scores for {1} rules".format(trainscores, tids)`
			`)`
			`template_counts = Counter(tids)`
			`weighted_traincounts = Counter()`
			`for (tid, score) in zip(tids, trainscores):`
			`weighted_traincounts[tid] += score`
			`tottrainscores = sum(trainscores)`

			`# det_tplsort() is for deterministic sorting;`
			`# the otherwise convenient Counter.most_common() unfortunately`
			`# does not break ties deterministically`
			`# between python versions and will break cross-version tests`
			`def det_tplsort(tpl_value):`
			`return (tpl_value[1], repr(tpl_value[0]))`

			`def print_train_stats():`
			`print(`
			`"TEMPLATE STATISTICS (TRAIN) {0} templates, {1} rules)".format(`
			`len(template_counts), len(tids)`
			`)`
			`)`
			`print(`
			`"TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "`
			`"final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats)`
			`)`
			`head = "#ID \| Score (train) \| #Rules \| Template"`
			`print(head, "\n", "-" * len(head), sep="")`
			`train_tplscores = sorted(`
			`weighted_traincounts.items(), key=det_tplsort, reverse=True`
			`)`
			`for (tid, trainscore) in train_tplscores:`
			`s = "{0} \| {1:5d} {2:5.3f} \|{3:4d} {4:.3f} \| {5}".format(`
			`tid,`
			`trainscore,`
			`trainscore / tottrainscores,`
			`template_counts[tid],`
			`template_counts[tid] / len(tids),`
			`Template.ALLTEMPLATES[int(tid)],`
			`)`
			`print(s)`

			`def print_testtrain_stats():`
			`testscores = test_stats['rulescores']`
			`print(`
			`"TEMPLATE STATISTICS (TEST AND TRAIN) ({0} templates, {1} rules)".format(`
			`len(template_counts), len(tids)`
			`)`
			`)`
			`print(`
			`"TEST ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "`
			`"final: {finalerrors:5d} {finalacc:.4f} ".format(**test_stats)`
			`)`
			`print(`
			`"TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "`
			`"final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats)`
			`)`
			`weighted_testcounts = Counter()`
			`for (tid, score) in zip(tids, testscores):`
			`weighted_testcounts[tid] += score`
			`tottestscores = sum(testscores)`
			`head = "#ID \| Score (test) \| Score (train) \| #Rules \| Template"`
			`print(head, "\n", "-" * len(head), sep="")`
			`test_tplscores = sorted(`
			`weighted_testcounts.items(), key=det_tplsort, reverse=True`
			`)`
			`for (tid, testscore) in test_tplscores:`
			`s = "{0:s} \|{1:5d} {2:6.3f} \| {3:4d} {4:.3f} \|{5:4d} {6:.3f} \| {7:s}".format(`
			`tid,`
			`testscore,`
			`testscore / tottestscores,`
			`weighted_traincounts[tid],`
			`weighted_traincounts[tid] / tottrainscores,`
			`template_counts[tid],`
			`template_counts[tid] / len(tids),`
			`Template.ALLTEMPLATES[int(tid)],`
			`)`
			`print(s)`

			`def print_unused_templates():`
			`usedtpls = set(int(tid) for tid in tids)`
			`unused = [`
			`(tid, tpl)`
			`for (tid, tpl) in enumerate(Template.ALLTEMPLATES)`
			`if tid not in usedtpls`
			`]`
			`print("UNUSED TEMPLATES ({0})".format(len(unused)))`

			`for (tid, tpl) in unused:`
			`print("{0:03d} {1:s}".format(tid, str(tpl)))`

			`if test_stats is None:`
			`print_train_stats()`
			`else:`
			`print_testtrain_stats()`
			`print()`
			`if printunused:`
			`print_unused_templates()`
			`print()`

			`def batch_tag_incremental(self, sequences, gold):`
			`"""`
			`Tags by applying each rule to the entire corpus (rather than all rules to a`
			`single sequence). The point is to collect statistics on the test set for`
			`individual rules.`

			`NOTE: This is inefficient (does not build any index, so will traverse the entire`
			`corpus N times for N rules) -- usually you would not care about statistics for`
			`individual rules and thus use batch_tag() instead`

			`:param sequences: lists of token sequences (sentences, in some applications) to be tagged`
			`:type sequences: list of list of strings`
			`:param gold: the gold standard`
			`:type gold: list of list of strings`
			`:returns: tuple of (tagged_sequences, ordered list of rule scores (one for each rule))`
			`"""`

			`def counterrors(xs):`
			`return sum(t[1] != g[1] for pair in zip(xs, gold) for (t, g) in zip(*pair))`

			`testing_stats = {}`
			`testing_stats['tokencount'] = sum(len(t) for t in sequences)`
			`testing_stats['sequencecount'] = len(sequences)`
			`tagged_tokenses = [self._initial_tagger.tag(tokens) for tokens in sequences]`
			`testing_stats['initialerrors'] = counterrors(tagged_tokenses)`
			`testing_stats['initialacc'] = (`
			`1 - testing_stats['initialerrors'] / testing_stats['tokencount']`
			`)`
			`# Apply each rule to the entire corpus, in order`
			`errors = [testing_stats['initialerrors']]`
			`for rule in self._rules:`
			`for tagged_tokens in tagged_tokenses:`
			`rule.apply(tagged_tokens)`
			`errors.append(counterrors(tagged_tokenses))`
			`testing_stats['rulescores'] = [`
			`err0 - err1 for (err0, err1) in zip(errors, errors[1:])`
			`]`
			`testing_stats['finalerrors'] = errors[-1]`
			`testing_stats['finalacc'] = (`
			`1 - testing_stats['finalerrors'] / testing_stats['tokencount']`
			`)`
			`return (tagged_tokenses, testing_stats)`