PCQRSCANER/venv/Lib/site-packages/nltk/corpus/reader/opinion_lexicon.py

# Natural Language Toolkit: Opinion Lexicon Corpus Reader
#
# Copyright (C) 2001-2019 NLTK Project
# Author: Pierpaolo Pantone <24alsecondo@gmail.com>
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT

"""
CorpusReader for the Opinion Lexicon.

- Opinion Lexicon information -
Authors: Minqing Hu and Bing Liu, 2004.
    Department of Computer Sicence
    University of Illinois at Chicago

Contact: Bing Liu, liub@cs.uic.edu
        http://www.cs.uic.edu/~liub

Distributed with permission.

Related papers:
- Minqing Hu and Bing Liu. "Mining and summarizing customer reviews".
    Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery
    & Data Mining (KDD-04), Aug 22-25, 2004, Seattle, Washington, USA.

- Bing Liu, Minqing Hu and Junsheng Cheng. "Opinion Observer: Analyzing and
    Comparing Opinions on the Web". Proceedings of the 14th International World
    Wide Web conference (WWW-2005), May 10-14, 2005, Chiba, Japan.
"""
from six import string_types

from nltk.corpus.reader import WordListCorpusReader
from nltk.corpus.reader.api import *


class IgnoreReadmeCorpusView(StreamBackedCorpusView):
    """
    This CorpusView is used to skip the initial readme block of the corpus.
    """

    def __init__(self, *args, **kwargs):
        StreamBackedCorpusView.__init__(self, *args, **kwargs)
        # open self._stream
        self._open()
        # skip the readme block
        read_blankline_block(self._stream)
        # Set the initial position to the current stream position
        self._filepos = [self._stream.tell()]


class OpinionLexiconCorpusReader(WordListCorpusReader):
    """
    Reader for Liu and Hu opinion lexicon.  Blank lines and readme are ignored.

        >>> from nltk.corpus import opinion_lexicon
        >>> opinion_lexicon.words()
        ['2-faced', '2-faces', 'abnormal', 'abolish', ...]

    The OpinionLexiconCorpusReader provides shortcuts to retrieve positive/negative
    words:

        >>> opinion_lexicon.negative()
        ['2-faced', '2-faces', 'abnormal', 'abolish', ...]

    Note that words from `words()` method are sorted by file id, not alphabetically:

        >>> opinion_lexicon.words()[0:10]
        ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably',
        'abominate', 'abomination', 'abort', 'aborted']
        >>> sorted(opinion_lexicon.words())[0:10]
        ['2-faced', '2-faces', 'a+', 'abnormal', 'abolish', 'abominable', 'abominably',
        'abominate', 'abomination', 'abort']
    """

    CorpusView = IgnoreReadmeCorpusView

    def words(self, fileids=None):
        """
        Return all words in the opinion lexicon. Note that these words are not
        sorted in alphabetical order.

        :param fileids: a list or regexp specifying the ids of the files whose
            words have to be returned.
        :return: the given file(s) as a list of words and punctuation symbols.
        :rtype: list(str)
        """
        if fileids is None:
            fileids = self._fileids
        elif isinstance(fileids, string_types):
            fileids = [fileids]
        return concat(
            [
                self.CorpusView(path, self._read_word_block, encoding=enc)
                for (path, enc, fileid) in self.abspaths(fileids, True, True)
            ]
        )

    def positive(self):
        """
        Return all positive words in alphabetical order.

        :return: a list of positive words.
        :rtype: list(str)
        """
        return self.words('positive-words.txt')

    def negative(self):
        """
        Return all negative words in alphabetical order.

        :return: a list of negative words.
        :rtype: list(str)
        """
        return self.words('negative-words.txt')

    def _read_word_block(self, stream):
        words = []
        for i in range(20):  # Read 20 lines at a time.
            line = stream.readline()
            if not line:
                continue
            words.append(line.strip())
        return words
3 2019-12-22 21:51:47 +01:00			`# Natural Language Toolkit: Opinion Lexicon Corpus Reader`
			`#`
			`# Copyright (C) 2001-2019 NLTK Project`
			`# Author: Pierpaolo Pantone <24alsecondo@gmail.com>`
			`# URL: <http://nltk.org/>`
			`# For license information, see LICENSE.TXT`

			`"""`
			`CorpusReader for the Opinion Lexicon.`

			`- Opinion Lexicon information -`
			`Authors: Minqing Hu and Bing Liu, 2004.`
			`Department of Computer Sicence`
			`University of Illinois at Chicago`

			`Contact: Bing Liu, liub@cs.uic.edu`
			`http://www.cs.uic.edu/~liub`

			`Distributed with permission.`

			`Related papers:`
			`- Minqing Hu and Bing Liu. "Mining and summarizing customer reviews".`
			`Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery`
			`& Data Mining (KDD-04), Aug 22-25, 2004, Seattle, Washington, USA.`

			`- Bing Liu, Minqing Hu and Junsheng Cheng. "Opinion Observer: Analyzing and`
			`Comparing Opinions on the Web". Proceedings of the 14th International World`
			`Wide Web conference (WWW-2005), May 10-14, 2005, Chiba, Japan.`
			`"""`
			`from six import string_types`

			`from nltk.corpus.reader import WordListCorpusReader`
			`from nltk.corpus.reader.api import *`


			`class IgnoreReadmeCorpusView(StreamBackedCorpusView):`
			`"""`
			`This CorpusView is used to skip the initial readme block of the corpus.`
			`"""`

			`def __init__(self, args, *kwargs):`
			`StreamBackedCorpusView.__init__(self, args, *kwargs)`
			`# open self._stream`
			`self._open()`
			`# skip the readme block`
			`read_blankline_block(self._stream)`
			`# Set the initial position to the current stream position`
			`self._filepos = [self._stream.tell()]`


			`class OpinionLexiconCorpusReader(WordListCorpusReader):`
			`"""`
			`Reader for Liu and Hu opinion lexicon. Blank lines and readme are ignored.`

			`>>> from nltk.corpus import opinion_lexicon`
			`>>> opinion_lexicon.words()`
			`['2-faced', '2-faces', 'abnormal', 'abolish', ...]`

			`The OpinionLexiconCorpusReader provides shortcuts to retrieve positive/negative`
			`words:`

			`>>> opinion_lexicon.negative()`
			`['2-faced', '2-faces', 'abnormal', 'abolish', ...]`

			Note that words from `words()` method are sorted by file id, not alphabetically:

			`>>> opinion_lexicon.words()[0:10]`
			`['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably',`
			`'abominate', 'abomination', 'abort', 'aborted']`
			`>>> sorted(opinion_lexicon.words())[0:10]`
			`['2-faced', '2-faces', 'a+', 'abnormal', 'abolish', 'abominable', 'abominably',`
			`'abominate', 'abomination', 'abort']`
			`"""`

			`CorpusView = IgnoreReadmeCorpusView`

			`def words(self, fileids=None):`
			`"""`
			`Return all words in the opinion lexicon. Note that these words are not`
			`sorted in alphabetical order.`

			`:param fileids: a list or regexp specifying the ids of the files whose`
			`words have to be returned.`
			`:return: the given file(s) as a list of words and punctuation symbols.`
			`:rtype: list(str)`
			`"""`
			`if fileids is None:`
			`fileids = self._fileids`
			`elif isinstance(fileids, string_types):`
			`fileids = [fileids]`
			`return concat(`
			`[`
			`self.CorpusView(path, self._read_word_block, encoding=enc)`
			`for (path, enc, fileid) in self.abspaths(fileids, True, True)`
			`]`
			`)`

			`def positive(self):`
			`"""`
			`Return all positive words in alphabetical order.`

			`:return: a list of positive words.`
			`:rtype: list(str)`
			`"""`
			`return self.words('positive-words.txt')`

			`def negative(self):`
			`"""`
			`Return all negative words in alphabetical order.`

			`:return: a list of negative words.`
			`:rtype: list(str)`
			`"""`
			`return self.words('negative-words.txt')`

			`def _read_word_block(self, stream):`
			`words = []`
			`for i in range(20): # Read 20 lines at a time.`
			`line = stream.readline()`
			`if not line:`
			`continue`
			`words.append(line.strip())`
			`return words`