PCQRSCANER/venv/Lib/site-packages/nltk/sentiment/vader.py

# coding: utf-8
# Natural Language Toolkit: vader
#
# Copyright (C) 2001-2019 NLTK Project
# Author: C.J. Hutto <Clayton.Hutto@gtri.gatech.edu>
#         Ewan Klein <ewan@inf.ed.ac.uk> (modifications)
#         Pierpaolo Pantone <24alsecondo@gmail.com> (modifications)
#         George Berry <geb97@cornell.edu> (modifications)
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT
#
# Modifications to the original VADER code have been made in order to
# integrate it into NLTK. These have involved changes to
# ensure Python 3 compatibility, and refactoring to achieve greater modularity.

"""
If you use the VADER sentiment analysis tools, please cite:

Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for
Sentiment Analysis of Social Media Text. Eighth International Conference on
Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
"""

import math
import re
import string
from itertools import product
import nltk.data
from .util import pairwise

##Constants##

# (empirically derived mean sentiment intensity rating increase for booster words)
B_INCR = 0.293
B_DECR = -0.293

# (empirically derived mean sentiment intensity rating increase for using
# ALLCAPs to emphasize a word)
C_INCR = 0.733

N_SCALAR = -0.74

# for removing punctuation
REGEX_REMOVE_PUNCTUATION = re.compile('[{0}]'.format(re.escape(string.punctuation)))

PUNC_LIST = [
    ".",
    "!",
    "?",
    ",",
    ";",
    ":",
    "-",
    "'",
    "\"",
    "!!",
    "!!!",
    "??",
    "???",
    "?!?",
    "!?!",
    "?!?!",
    "!?!?",
]
NEGATE = {
    "aint",
    "arent",
    "cannot",
    "cant",
    "couldnt",
    "darent",
    "didnt",
    "doesnt",
    "ain't",
    "aren't",
    "can't",
    "couldn't",
    "daren't",
    "didn't",
    "doesn't",
    "dont",
    "hadnt",
    "hasnt",
    "havent",
    "isnt",
    "mightnt",
    "mustnt",
    "neither",
    "don't",
    "hadn't",
    "hasn't",
    "haven't",
    "isn't",
    "mightn't",
    "mustn't",
    "neednt",
    "needn't",
    "never",
    "none",
    "nope",
    "nor",
    "not",
    "nothing",
    "nowhere",
    "oughtnt",
    "shant",
    "shouldnt",
    "uhuh",
    "wasnt",
    "werent",
    "oughtn't",
    "shan't",
    "shouldn't",
    "uh-uh",
    "wasn't",
    "weren't",
    "without",
    "wont",
    "wouldnt",
    "won't",
    "wouldn't",
    "rarely",
    "seldom",
    "despite",
}

# booster/dampener 'intensifiers' or 'degree adverbs'
# http://en.wiktionary.org/wiki/Category:English_degree_adverbs

BOOSTER_DICT = {
    "absolutely": B_INCR,
    "amazingly": B_INCR,
    "awfully": B_INCR,
    "completely": B_INCR,
    "considerably": B_INCR,
    "decidedly": B_INCR,
    "deeply": B_INCR,
    "effing": B_INCR,
    "enormously": B_INCR,
    "entirely": B_INCR,
    "especially": B_INCR,
    "exceptionally": B_INCR,
    "extremely": B_INCR,
    "fabulously": B_INCR,
    "flipping": B_INCR,
    "flippin": B_INCR,
    "fricking": B_INCR,
    "frickin": B_INCR,
    "frigging": B_INCR,
    "friggin": B_INCR,
    "fully": B_INCR,
    "fucking": B_INCR,
    "greatly": B_INCR,
    "hella": B_INCR,
    "highly": B_INCR,
    "hugely": B_INCR,
    "incredibly": B_INCR,
    "intensely": B_INCR,
    "majorly": B_INCR,
    "more": B_INCR,
    "most": B_INCR,
    "particularly": B_INCR,
    "purely": B_INCR,
    "quite": B_INCR,
    "really": B_INCR,
    "remarkably": B_INCR,
    "so": B_INCR,
    "substantially": B_INCR,
    "thoroughly": B_INCR,
    "totally": B_INCR,
    "tremendously": B_INCR,
    "uber": B_INCR,
    "unbelievably": B_INCR,
    "unusually": B_INCR,
    "utterly": B_INCR,
    "very": B_INCR,
    "almost": B_DECR,
    "barely": B_DECR,
    "hardly": B_DECR,
    "just enough": B_DECR,
    "kind of": B_DECR,
    "kinda": B_DECR,
    "kindof": B_DECR,
    "kind-of": B_DECR,
    "less": B_DECR,
    "little": B_DECR,
    "marginally": B_DECR,
    "occasionally": B_DECR,
    "partly": B_DECR,
    "scarcely": B_DECR,
    "slightly": B_DECR,
    "somewhat": B_DECR,
    "sort of": B_DECR,
    "sorta": B_DECR,
    "sortof": B_DECR,
    "sort-of": B_DECR,
}

# check for special case idioms using a sentiment-laden keyword known to SAGE
SPECIAL_CASE_IDIOMS = {
    "the shit": 3,
    "the bomb": 3,
    "bad ass": 1.5,
    "yeah right": -2,
    "cut the mustard": 2,
    "kiss of death": -1.5,
    "hand to mouth": -2,
}


##Static methods##


def negated(input_words, include_nt=True):
    """
    Determine if input contains negation words
    """
    neg_words = NEGATE
    if any(word.lower() in neg_words for word in input_words):
        return True
    if include_nt:
        if any("n't" in word.lower() for word in input_words):
            return True
    for first, second in pairwise(input_words):
        if second.lower() == "least" and first.lower() != 'at':
            return True
    return False


def normalize(score, alpha=15):
    """
    Normalize the score to be between -1 and 1 using an alpha that
    approximates the max expected value
    """
    norm_score = score / math.sqrt((score * score) + alpha)
    return norm_score


def allcap_differential(words):
    """
    Check whether just some words in the input are ALL CAPS

    :param list words: The words to inspect
    :returns: `True` if some but not all items in `words` are ALL CAPS
    """
    is_different = False
    allcap_words = 0
    for word in words:
        if word.isupper():
            allcap_words += 1
    cap_differential = len(words) - allcap_words
    if 0 < cap_differential < len(words):
        is_different = True
    return is_different


def scalar_inc_dec(word, valence, is_cap_diff):
    """
    Check if the preceding words increase, decrease, or negate/nullify the
    valence
    """
    scalar = 0.0
    word_lower = word.lower()
    if word_lower in BOOSTER_DICT:
        scalar = BOOSTER_DICT[word_lower]
        if valence < 0:
            scalar *= -1
        # check if booster/dampener word is in ALLCAPS (while others aren't)
        if word.isupper() and is_cap_diff:
            if valence > 0:
                scalar += C_INCR
            else:
                scalar -= C_INCR
    return scalar


class SentiText(object):
    """
    Identify sentiment-relevant string-level properties of input text.
    """

    def __init__(self, text):
        if not isinstance(text, str):
            text = str(text.encode('utf-8'))
        self.text = text
        self.words_and_emoticons = self._words_and_emoticons()
        # doesn't separate words from\
        # adjacent punctuation (keeps emoticons & contractions)
        self.is_cap_diff = allcap_differential(self.words_and_emoticons)

    def _words_plus_punc(self):
        """
        Returns mapping of form:
        {
            'cat,': 'cat',
            ',cat': 'cat',
        }
        """
        no_punc_text = REGEX_REMOVE_PUNCTUATION.sub('', self.text)
        # removes punctuation (but loses emoticons & contractions)
        words_only = no_punc_text.split()
        # remove singletons
        words_only = set(w for w in words_only if len(w) > 1)
        # the product gives ('cat', ',') and (',', 'cat')
        punc_before = {''.join(p): p[1] for p in product(PUNC_LIST, words_only)}
        punc_after = {''.join(p): p[0] for p in product(words_only, PUNC_LIST)}
        words_punc_dict = punc_before
        words_punc_dict.update(punc_after)
        return words_punc_dict

    def _words_and_emoticons(self):
        """
        Removes leading and trailing puncutation
        Leaves contractions and most emoticons
            Does not preserve punc-plus-letter emoticons (e.g. :D)
        """
        wes = self.text.split()
        words_punc_dict = self._words_plus_punc()
        wes = [we for we in wes if len(we) > 1]
        for i, we in enumerate(wes):
            if we in words_punc_dict:
                wes[i] = words_punc_dict[we]
        return wes


class SentimentIntensityAnalyzer(object):
    """
    Give a sentiment intensity score to sentences.
    """

    def __init__(
        self, lexicon_file="sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt"
    ):
        self.lexicon_file = nltk.data.load(lexicon_file)
        self.lexicon = self.make_lex_dict()

    def make_lex_dict(self):
        """
        Convert lexicon file to a dictionary
        """
        lex_dict = {}
        for line in self.lexicon_file.split('\n'):
            (word, measure) = line.strip().split('\t')[0:2]
            lex_dict[word] = float(measure)
        return lex_dict

    def polarity_scores(self, text):
        """
        Return a float for sentiment strength based on the input text.
        Positive values are positive valence, negative value are negative
        valence.
        """
        sentitext = SentiText(text)
        # text, words_and_emoticons, is_cap_diff = self.preprocess(text)

        sentiments = []
        words_and_emoticons = sentitext.words_and_emoticons
        for item in words_and_emoticons:
            valence = 0
            i = words_and_emoticons.index(item)
            if (
                i < len(words_and_emoticons) - 1
                and item.lower() == "kind"
                and words_and_emoticons[i + 1].lower() == "of"
            ) or item.lower() in BOOSTER_DICT:
                sentiments.append(valence)
                continue

            sentiments = self.sentiment_valence(valence, sentitext, item, i, sentiments)

        sentiments = self._but_check(words_and_emoticons, sentiments)

        return self.score_valence(sentiments, text)

    def sentiment_valence(self, valence, sentitext, item, i, sentiments):
        is_cap_diff = sentitext.is_cap_diff
        words_and_emoticons = sentitext.words_and_emoticons
        item_lowercase = item.lower()
        if item_lowercase in self.lexicon:
            # get the sentiment valence
            valence = self.lexicon[item_lowercase]

            # check if sentiment laden word is in ALL CAPS (while others aren't)
            if item.isupper() and is_cap_diff:
                if valence > 0:
                    valence += C_INCR
                else:
                    valence -= C_INCR

            for start_i in range(0, 3):
                if (
                    i > start_i
                    and words_and_emoticons[i - (start_i + 1)].lower()
                    not in self.lexicon
                ):
                    # dampen the scalar modifier of preceding words and emoticons
                    # (excluding the ones that immediately preceed the item) based
                    # on their distance from the current item.
                    s = scalar_inc_dec(
                        words_and_emoticons[i - (start_i + 1)], valence, is_cap_diff
                    )
                    if start_i == 1 and s != 0:
                        s = s * 0.95
                    if start_i == 2 and s != 0:
                        s = s * 0.9
                    valence = valence + s
                    valence = self._never_check(
                        valence, words_and_emoticons, start_i, i
                    )
                    if start_i == 2:
                        valence = self._idioms_check(valence, words_and_emoticons, i)

                        # future work: consider other sentiment-laden idioms
                        # other_idioms =
                        # {"back handed": -2, "blow smoke": -2, "blowing smoke": -2,
                        #  "upper hand": 1, "break a leg": 2,
                        #  "cooking with gas": 2, "in the black": 2, "in the red": -2,
                        #  "on the ball": 2,"under the weather": -2}

            valence = self._least_check(valence, words_and_emoticons, i)

        sentiments.append(valence)
        return sentiments

    def _least_check(self, valence, words_and_emoticons, i):
        # check for negation case using "least"
        if (
            i > 1
            and words_and_emoticons[i - 1].lower() not in self.lexicon
            and words_and_emoticons[i - 1].lower() == "least"
        ):
            if (
                words_and_emoticons[i - 2].lower() != "at"
                and words_and_emoticons[i - 2].lower() != "very"
            ):
                valence = valence * N_SCALAR
        elif (
            i > 0
            and words_and_emoticons[i - 1].lower() not in self.lexicon
            and words_and_emoticons[i - 1].lower() == "least"
        ):
            valence = valence * N_SCALAR
        return valence

    def _but_check(self, words_and_emoticons, sentiments):
        # check for modification in sentiment due to contrastive conjunction 'but'
        if 'but' in words_and_emoticons or 'BUT' in words_and_emoticons:
            try:
                bi = words_and_emoticons.index('but')
            except ValueError:
                bi = words_and_emoticons.index('BUT')
            for sentiment in sentiments:
                si = sentiments.index(sentiment)
                if si < bi:
                    sentiments.pop(si)
                    sentiments.insert(si, sentiment * 0.5)
                elif si > bi:
                    sentiments.pop(si)
                    sentiments.insert(si, sentiment * 1.5)
        return sentiments

    def _idioms_check(self, valence, words_and_emoticons, i):
        onezero = "{0} {1}".format(words_and_emoticons[i - 1], words_and_emoticons[i])

        twoonezero = "{0} {1} {2}".format(
            words_and_emoticons[i - 2],
            words_and_emoticons[i - 1],
            words_and_emoticons[i],
        )

        twoone = "{0} {1}".format(
            words_and_emoticons[i - 2], words_and_emoticons[i - 1]
        )

        threetwoone = "{0} {1} {2}".format(
            words_and_emoticons[i - 3],
            words_and_emoticons[i - 2],
            words_and_emoticons[i - 1],
        )

        threetwo = "{0} {1}".format(
            words_and_emoticons[i - 3], words_and_emoticons[i - 2]
        )

        sequences = [onezero, twoonezero, twoone, threetwoone, threetwo]

        for seq in sequences:
            if seq in SPECIAL_CASE_IDIOMS:
                valence = SPECIAL_CASE_IDIOMS[seq]
                break

        if len(words_and_emoticons) - 1 > i:
            zeroone = "{0} {1}".format(
                words_and_emoticons[i], words_and_emoticons[i + 1]
            )
            if zeroone in SPECIAL_CASE_IDIOMS:
                valence = SPECIAL_CASE_IDIOMS[zeroone]
        if len(words_and_emoticons) - 1 > i + 1:
            zeroonetwo = "{0} {1} {2}".format(
                words_and_emoticons[i],
                words_and_emoticons[i + 1],
                words_and_emoticons[i + 2],
            )
            if zeroonetwo in SPECIAL_CASE_IDIOMS:
                valence = SPECIAL_CASE_IDIOMS[zeroonetwo]

        # check for booster/dampener bi-grams such as 'sort of' or 'kind of'
        if threetwo in BOOSTER_DICT or twoone in BOOSTER_DICT:
            valence = valence + B_DECR
        return valence

    def _never_check(self, valence, words_and_emoticons, start_i, i):
        if start_i == 0:
            if negated([words_and_emoticons[i - 1]]):
                valence = valence * N_SCALAR
        if start_i == 1:
            if words_and_emoticons[i - 2] == "never" and (
                words_and_emoticons[i - 1] == "so"
                or words_and_emoticons[i - 1] == "this"
            ):
                valence = valence * 1.5
            elif negated([words_and_emoticons[i - (start_i + 1)]]):
                valence = valence * N_SCALAR
        if start_i == 2:
            if (
                words_and_emoticons[i - 3] == "never"
                and (
                    words_and_emoticons[i - 2] == "so"
                    or words_and_emoticons[i - 2] == "this"
                )
                or (
                    words_and_emoticons[i - 1] == "so"
                    or words_and_emoticons[i - 1] == "this"
                )
            ):
                valence = valence * 1.25
            elif negated([words_and_emoticons[i - (start_i + 1)]]):
                valence = valence * N_SCALAR
        return valence

    def _punctuation_emphasis(self, sum_s, text):
        # add emphasis from exclamation points and question marks
        ep_amplifier = self._amplify_ep(text)
        qm_amplifier = self._amplify_qm(text)
        punct_emph_amplifier = ep_amplifier + qm_amplifier
        return punct_emph_amplifier

    def _amplify_ep(self, text):
        # check for added emphasis resulting from exclamation points (up to 4 of them)
        ep_count = text.count("!")
        if ep_count > 4:
            ep_count = 4
        # (empirically derived mean sentiment intensity rating increase for
        # exclamation points)
        ep_amplifier = ep_count * 0.292
        return ep_amplifier

    def _amplify_qm(self, text):
        # check for added emphasis resulting from question marks (2 or 3+)
        qm_count = text.count("?")
        qm_amplifier = 0
        if qm_count > 1:
            if qm_count <= 3:
                # (empirically derived mean sentiment intensity rating increase for
                # question marks)
                qm_amplifier = qm_count * 0.18
            else:
                qm_amplifier = 0.96
        return qm_amplifier

    def _sift_sentiment_scores(self, sentiments):
        # want separate positive versus negative sentiment scores
        pos_sum = 0.0
        neg_sum = 0.0
        neu_count = 0
        for sentiment_score in sentiments:
            if sentiment_score > 0:
                pos_sum += (
                    float(sentiment_score) + 1
                )  # compensates for neutral words that are counted as 1
            if sentiment_score < 0:
                neg_sum += (
                    float(sentiment_score) - 1
                )  # when used with math.fabs(), compensates for neutrals
            if sentiment_score == 0:
                neu_count += 1
        return pos_sum, neg_sum, neu_count

    def score_valence(self, sentiments, text):
        if sentiments:
            sum_s = float(sum(sentiments))
            # compute and add emphasis from punctuation in text
            punct_emph_amplifier = self._punctuation_emphasis(sum_s, text)
            if sum_s > 0:
                sum_s += punct_emph_amplifier
            elif sum_s < 0:
                sum_s -= punct_emph_amplifier

            compound = normalize(sum_s)
            # discriminate between positive, negative and neutral sentiment scores
            pos_sum, neg_sum, neu_count = self._sift_sentiment_scores(sentiments)

            if pos_sum > math.fabs(neg_sum):
                pos_sum += punct_emph_amplifier
            elif pos_sum < math.fabs(neg_sum):
                neg_sum -= punct_emph_amplifier

            total = pos_sum + math.fabs(neg_sum) + neu_count
            pos = math.fabs(pos_sum / total)
            neg = math.fabs(neg_sum / total)
            neu = math.fabs(neu_count / total)

        else:
            compound = 0.0
            pos = 0.0
            neg = 0.0
            neu = 0.0

        sentiment_dict = {
            "neg": round(neg, 3),
            "neu": round(neu, 3),
            "pos": round(pos, 3),
            "compound": round(compound, 4),
        }

        return sentiment_dict
3 2019-12-22 21:51:47 +01:00			`# coding: utf-8`
			`# Natural Language Toolkit: vader`
			`#`
			`# Copyright (C) 2001-2019 NLTK Project`
			`# Author: C.J. Hutto <Clayton.Hutto@gtri.gatech.edu>`
			`# Ewan Klein <ewan@inf.ed.ac.uk> (modifications)`
			`# Pierpaolo Pantone <24alsecondo@gmail.com> (modifications)`
			`# George Berry <geb97@cornell.edu> (modifications)`
			`# URL: <http://nltk.org/>`
			`# For license information, see LICENSE.TXT`
			`#`
			`# Modifications to the original VADER code have been made in order to`
			`# integrate it into NLTK. These have involved changes to`
			`# ensure Python 3 compatibility, and refactoring to achieve greater modularity.`

			`"""`
			`If you use the VADER sentiment analysis tools, please cite:`

			`Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for`
			`Sentiment Analysis of Social Media Text. Eighth International Conference on`
			`Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.`
			`"""`

			`import math`
			`import re`
			`import string`
			`from itertools import product`
			`import nltk.data`
			`from .util import pairwise`

			`##Constants##`

			`# (empirically derived mean sentiment intensity rating increase for booster words)`
			`B_INCR = 0.293`
			`B_DECR = -0.293`

			`# (empirically derived mean sentiment intensity rating increase for using`
			`# ALLCAPs to emphasize a word)`
			`C_INCR = 0.733`

			`N_SCALAR = -0.74`

			`# for removing punctuation`
			`REGEX_REMOVE_PUNCTUATION = re.compile('[{0}]'.format(re.escape(string.punctuation)))`

			`PUNC_LIST = [`
			`".",`
			`"!",`
			`"?",`
			`",",`
			`";",`
			`":",`
			`"-",`
			`"'",`
			`"\"",`
			`"!!",`
			`"!!!",`
			`"??",`
			`"???",`
			`"?!?",`
			`"!?!",`
			`"?!?!",`
			`"!?!?",`
			`]`
			`NEGATE = {`
			`"aint",`
			`"arent",`
			`"cannot",`
			`"cant",`
			`"couldnt",`
			`"darent",`
			`"didnt",`
			`"doesnt",`
			`"ain't",`
			`"aren't",`
			`"can't",`
			`"couldn't",`
			`"daren't",`
			`"didn't",`
			`"doesn't",`
			`"dont",`
			`"hadnt",`
			`"hasnt",`
			`"havent",`
			`"isnt",`
			`"mightnt",`
			`"mustnt",`
			`"neither",`
			`"don't",`
			`"hadn't",`
			`"hasn't",`
			`"haven't",`
			`"isn't",`
			`"mightn't",`
			`"mustn't",`
			`"neednt",`
			`"needn't",`
			`"never",`
			`"none",`
			`"nope",`
			`"nor",`
			`"not",`
			`"nothing",`
			`"nowhere",`
			`"oughtnt",`
			`"shant",`
			`"shouldnt",`
			`"uhuh",`
			`"wasnt",`
			`"werent",`
			`"oughtn't",`
			`"shan't",`
			`"shouldn't",`
			`"uh-uh",`
			`"wasn't",`
			`"weren't",`
			`"without",`
			`"wont",`
			`"wouldnt",`
			`"won't",`
			`"wouldn't",`
			`"rarely",`
			`"seldom",`
			`"despite",`
			`}`

			`# booster/dampener 'intensifiers' or 'degree adverbs'`
			`# http://en.wiktionary.org/wiki/Category:English_degree_adverbs`

			`BOOSTER_DICT = {`
			`"absolutely": B_INCR,`
			`"amazingly": B_INCR,`
			`"awfully": B_INCR,`
			`"completely": B_INCR,`
			`"considerably": B_INCR,`
			`"decidedly": B_INCR,`
			`"deeply": B_INCR,`
			`"effing": B_INCR,`
			`"enormously": B_INCR,`
			`"entirely": B_INCR,`
			`"especially": B_INCR,`
			`"exceptionally": B_INCR,`
			`"extremely": B_INCR,`
			`"fabulously": B_INCR,`
			`"flipping": B_INCR,`
			`"flippin": B_INCR,`
			`"fricking": B_INCR,`
			`"frickin": B_INCR,`
			`"frigging": B_INCR,`
			`"friggin": B_INCR,`
			`"fully": B_INCR,`
			`"fucking": B_INCR,`
			`"greatly": B_INCR,`
			`"hella": B_INCR,`
			`"highly": B_INCR,`
			`"hugely": B_INCR,`
			`"incredibly": B_INCR,`
			`"intensely": B_INCR,`
			`"majorly": B_INCR,`
			`"more": B_INCR,`
			`"most": B_INCR,`
			`"particularly": B_INCR,`
			`"purely": B_INCR,`
			`"quite": B_INCR,`
			`"really": B_INCR,`
			`"remarkably": B_INCR,`
			`"so": B_INCR,`
			`"substantially": B_INCR,`
			`"thoroughly": B_INCR,`
			`"totally": B_INCR,`
			`"tremendously": B_INCR,`
			`"uber": B_INCR,`
			`"unbelievably": B_INCR,`
			`"unusually": B_INCR,`
			`"utterly": B_INCR,`
			`"very": B_INCR,`
			`"almost": B_DECR,`
			`"barely": B_DECR,`
			`"hardly": B_DECR,`
			`"just enough": B_DECR,`
			`"kind of": B_DECR,`
			`"kinda": B_DECR,`
			`"kindof": B_DECR,`
			`"kind-of": B_DECR,`
			`"less": B_DECR,`
			`"little": B_DECR,`
			`"marginally": B_DECR,`
			`"occasionally": B_DECR,`
			`"partly": B_DECR,`
			`"scarcely": B_DECR,`
			`"slightly": B_DECR,`
			`"somewhat": B_DECR,`
			`"sort of": B_DECR,`
			`"sorta": B_DECR,`
			`"sortof": B_DECR,`
			`"sort-of": B_DECR,`
			`}`

			`# check for special case idioms using a sentiment-laden keyword known to SAGE`
			`SPECIAL_CASE_IDIOMS = {`
			`"the shit": 3,`
			`"the bomb": 3,`
			`"bad ass": 1.5,`
			`"yeah right": -2,`
			`"cut the mustard": 2,`
			`"kiss of death": -1.5,`
			`"hand to mouth": -2,`
			`}`


			`##Static methods##`


			`def negated(input_words, include_nt=True):`
			`"""`
			`Determine if input contains negation words`
			`"""`
			`neg_words = NEGATE`
			`if any(word.lower() in neg_words for word in input_words):`
			`return True`
			`if include_nt:`
			`if any("n't" in word.lower() for word in input_words):`
			`return True`
			`for first, second in pairwise(input_words):`
			`if second.lower() == "least" and first.lower() != 'at':`
			`return True`
			`return False`


			`def normalize(score, alpha=15):`
			`"""`
			`Normalize the score to be between -1 and 1 using an alpha that`
			`approximates the max expected value`
			`"""`
			`norm_score = score / math.sqrt((score * score) + alpha)`
			`return norm_score`


			`def allcap_differential(words):`
			`"""`
			`Check whether just some words in the input are ALL CAPS`

			`:param list words: The words to inspect`
			:returns: `True` if some but not all items in `words` are ALL CAPS
			`"""`
			`is_different = False`
			`allcap_words = 0`
			`for word in words:`
			`if word.isupper():`
			`allcap_words += 1`
			`cap_differential = len(words) - allcap_words`
			`if 0 < cap_differential < len(words):`
			`is_different = True`
			`return is_different`


			`def scalar_inc_dec(word, valence, is_cap_diff):`
			`"""`
			`Check if the preceding words increase, decrease, or negate/nullify the`
			`valence`
			`"""`
			`scalar = 0.0`
			`word_lower = word.lower()`
			`if word_lower in BOOSTER_DICT:`
			`scalar = BOOSTER_DICT[word_lower]`
			`if valence < 0:`
			`scalar *= -1`
			`# check if booster/dampener word is in ALLCAPS (while others aren't)`
			`if word.isupper() and is_cap_diff:`
			`if valence > 0:`
			`scalar += C_INCR`
			`else:`
			`scalar -= C_INCR`
			`return scalar`


			`class SentiText(object):`
			`"""`
			`Identify sentiment-relevant string-level properties of input text.`
			`"""`

			`def __init__(self, text):`
			`if not isinstance(text, str):`
			`text = str(text.encode('utf-8'))`
			`self.text = text`
			`self.words_and_emoticons = self._words_and_emoticons()`
			`# doesn't separate words from\`
			`# adjacent punctuation (keeps emoticons & contractions)`
			`self.is_cap_diff = allcap_differential(self.words_and_emoticons)`

			`def _words_plus_punc(self):`
			`"""`
			`Returns mapping of form:`
			`{`
			`'cat,': 'cat',`
			`',cat': 'cat',`
			`}`
			`"""`
			`no_punc_text = REGEX_REMOVE_PUNCTUATION.sub('', self.text)`
			`# removes punctuation (but loses emoticons & contractions)`
			`words_only = no_punc_text.split()`
			`# remove singletons`
			`words_only = set(w for w in words_only if len(w) > 1)`
			`# the product gives ('cat', ',') and (',', 'cat')`
			`punc_before = {''.join(p): p[1] for p in product(PUNC_LIST, words_only)}`
			`punc_after = {''.join(p): p[0] for p in product(words_only, PUNC_LIST)}`
			`words_punc_dict = punc_before`
			`words_punc_dict.update(punc_after)`
			`return words_punc_dict`

			`def _words_and_emoticons(self):`
			`"""`
			`Removes leading and trailing puncutation`
			`Leaves contractions and most emoticons`
			`Does not preserve punc-plus-letter emoticons (e.g. :D)`
			`"""`
			`wes = self.text.split()`
			`words_punc_dict = self._words_plus_punc()`
			`wes = [we for we in wes if len(we) > 1]`
			`for i, we in enumerate(wes):`
			`if we in words_punc_dict:`
			`wes[i] = words_punc_dict[we]`
			`return wes`


			`class SentimentIntensityAnalyzer(object):`
			`"""`
			`Give a sentiment intensity score to sentences.`
			`"""`

			`def __init__(`
			`self, lexicon_file="sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt"`
			`):`
			`self.lexicon_file = nltk.data.load(lexicon_file)`
			`self.lexicon = self.make_lex_dict()`

			`def make_lex_dict(self):`
			`"""`
			`Convert lexicon file to a dictionary`
			`"""`
			`lex_dict = {}`
			`for line in self.lexicon_file.split('\n'):`
			`(word, measure) = line.strip().split('\t')[0:2]`
			`lex_dict[word] = float(measure)`
			`return lex_dict`

			`def polarity_scores(self, text):`
			`"""`
			`Return a float for sentiment strength based on the input text.`
			`Positive values are positive valence, negative value are negative`
			`valence.`
			`"""`
			`sentitext = SentiText(text)`
			`# text, words_and_emoticons, is_cap_diff = self.preprocess(text)`

			`sentiments = []`
			`words_and_emoticons = sentitext.words_and_emoticons`
			`for item in words_and_emoticons:`
			`valence = 0`
			`i = words_and_emoticons.index(item)`
			`if (`
			`i < len(words_and_emoticons) - 1`
			`and item.lower() == "kind"`
			`and words_and_emoticons[i + 1].lower() == "of"`
			`) or item.lower() in BOOSTER_DICT:`
			`sentiments.append(valence)`
			`continue`

			`sentiments = self.sentiment_valence(valence, sentitext, item, i, sentiments)`

			`sentiments = self._but_check(words_and_emoticons, sentiments)`

			`return self.score_valence(sentiments, text)`

			`def sentiment_valence(self, valence, sentitext, item, i, sentiments):`
			`is_cap_diff = sentitext.is_cap_diff`
			`words_and_emoticons = sentitext.words_and_emoticons`
			`item_lowercase = item.lower()`
			`if item_lowercase in self.lexicon:`
			`# get the sentiment valence`
			`valence = self.lexicon[item_lowercase]`

			`# check if sentiment laden word is in ALL CAPS (while others aren't)`
			`if item.isupper() and is_cap_diff:`
			`if valence > 0:`
			`valence += C_INCR`
			`else:`
			`valence -= C_INCR`

			`for start_i in range(0, 3):`
			`if (`
			`i > start_i`
			`and words_and_emoticons[i - (start_i + 1)].lower()`
			`not in self.lexicon`
			`):`
			`# dampen the scalar modifier of preceding words and emoticons`
			`# (excluding the ones that immediately preceed the item) based`
			`# on their distance from the current item.`
			`s = scalar_inc_dec(`
			`words_and_emoticons[i - (start_i + 1)], valence, is_cap_diff`
			`)`
			`if start_i == 1 and s != 0:`
			`s = s * 0.95`
			`if start_i == 2 and s != 0:`
			`s = s * 0.9`
			`valence = valence + s`
			`valence = self._never_check(`
			`valence, words_and_emoticons, start_i, i`
			`)`
			`if start_i == 2:`
			`valence = self._idioms_check(valence, words_and_emoticons, i)`

			`# future work: consider other sentiment-laden idioms`
			`# other_idioms =`
			`# {"back handed": -2, "blow smoke": -2, "blowing smoke": -2,`
			`# "upper hand": 1, "break a leg": 2,`
			`# "cooking with gas": 2, "in the black": 2, "in the red": -2,`
			`# "on the ball": 2,"under the weather": -2}`

			`valence = self._least_check(valence, words_and_emoticons, i)`

			`sentiments.append(valence)`
			`return sentiments`

			`def _least_check(self, valence, words_and_emoticons, i):`
			`# check for negation case using "least"`
			`if (`
			`i > 1`
			`and words_and_emoticons[i - 1].lower() not in self.lexicon`
			`and words_and_emoticons[i - 1].lower() == "least"`
			`):`
			`if (`
			`words_and_emoticons[i - 2].lower() != "at"`
			`and words_and_emoticons[i - 2].lower() != "very"`
			`):`
			`valence = valence * N_SCALAR`
			`elif (`
			`i > 0`
			`and words_and_emoticons[i - 1].lower() not in self.lexicon`
			`and words_and_emoticons[i - 1].lower() == "least"`
			`):`
			`valence = valence * N_SCALAR`
			`return valence`

			`def _but_check(self, words_and_emoticons, sentiments):`
			`# check for modification in sentiment due to contrastive conjunction 'but'`
			`if 'but' in words_and_emoticons or 'BUT' in words_and_emoticons:`
			`try:`
			`bi = words_and_emoticons.index('but')`
			`except ValueError:`
			`bi = words_and_emoticons.index('BUT')`
			`for sentiment in sentiments:`
			`si = sentiments.index(sentiment)`
			`if si < bi:`
			`sentiments.pop(si)`
			`sentiments.insert(si, sentiment * 0.5)`
			`elif si > bi:`
			`sentiments.pop(si)`
			`sentiments.insert(si, sentiment * 1.5)`
			`return sentiments`

			`def _idioms_check(self, valence, words_and_emoticons, i):`
			`onezero = "{0} {1}".format(words_and_emoticons[i - 1], words_and_emoticons[i])`

			`twoonezero = "{0} {1} {2}".format(`
			`words_and_emoticons[i - 2],`
			`words_and_emoticons[i - 1],`
			`words_and_emoticons[i],`
			`)`

			`twoone = "{0} {1}".format(`
			`words_and_emoticons[i - 2], words_and_emoticons[i - 1]`
			`)`

			`threetwoone = "{0} {1} {2}".format(`
			`words_and_emoticons[i - 3],`
			`words_and_emoticons[i - 2],`
			`words_and_emoticons[i - 1],`
			`)`

			`threetwo = "{0} {1}".format(`
			`words_and_emoticons[i - 3], words_and_emoticons[i - 2]`
			`)`

			`sequences = [onezero, twoonezero, twoone, threetwoone, threetwo]`

			`for seq in sequences:`
			`if seq in SPECIAL_CASE_IDIOMS:`
			`valence = SPECIAL_CASE_IDIOMS[seq]`
			`break`

			`if len(words_and_emoticons) - 1 > i:`
			`zeroone = "{0} {1}".format(`
			`words_and_emoticons[i], words_and_emoticons[i + 1]`
			`)`
			`if zeroone in SPECIAL_CASE_IDIOMS:`
			`valence = SPECIAL_CASE_IDIOMS[zeroone]`
			`if len(words_and_emoticons) - 1 > i + 1:`
			`zeroonetwo = "{0} {1} {2}".format(`
			`words_and_emoticons[i],`
			`words_and_emoticons[i + 1],`
			`words_and_emoticons[i + 2],`
			`)`
			`if zeroonetwo in SPECIAL_CASE_IDIOMS:`
			`valence = SPECIAL_CASE_IDIOMS[zeroonetwo]`

			`# check for booster/dampener bi-grams such as 'sort of' or 'kind of'`
			`if threetwo in BOOSTER_DICT or twoone in BOOSTER_DICT:`
			`valence = valence + B_DECR`
			`return valence`

			`def _never_check(self, valence, words_and_emoticons, start_i, i):`
			`if start_i == 0:`
			`if negated([words_and_emoticons[i - 1]]):`
			`valence = valence * N_SCALAR`
			`if start_i == 1:`
			`if words_and_emoticons[i - 2] == "never" and (`
			`words_and_emoticons[i - 1] == "so"`
			`or words_and_emoticons[i - 1] == "this"`
			`):`
			`valence = valence * 1.5`
			`elif negated([words_and_emoticons[i - (start_i + 1)]]):`
			`valence = valence * N_SCALAR`
			`if start_i == 2:`
			`if (`
			`words_and_emoticons[i - 3] == "never"`
			`and (`
			`words_and_emoticons[i - 2] == "so"`
			`or words_and_emoticons[i - 2] == "this"`
			`)`
			`or (`
			`words_and_emoticons[i - 1] == "so"`
			`or words_and_emoticons[i - 1] == "this"`
			`)`
			`):`
			`valence = valence * 1.25`
			`elif negated([words_and_emoticons[i - (start_i + 1)]]):`
			`valence = valence * N_SCALAR`
			`return valence`

			`def _punctuation_emphasis(self, sum_s, text):`
			`# add emphasis from exclamation points and question marks`
			`ep_amplifier = self._amplify_ep(text)`
			`qm_amplifier = self._amplify_qm(text)`
			`punct_emph_amplifier = ep_amplifier + qm_amplifier`
			`return punct_emph_amplifier`

			`def _amplify_ep(self, text):`
			`# check for added emphasis resulting from exclamation points (up to 4 of them)`
			`ep_count = text.count("!")`
			`if ep_count > 4:`
			`ep_count = 4`
			`# (empirically derived mean sentiment intensity rating increase for`
			`# exclamation points)`
			`ep_amplifier = ep_count * 0.292`
			`return ep_amplifier`

			`def _amplify_qm(self, text):`
			`# check for added emphasis resulting from question marks (2 or 3+)`
			`qm_count = text.count("?")`
			`qm_amplifier = 0`
			`if qm_count > 1:`
			`if qm_count <= 3:`
			`# (empirically derived mean sentiment intensity rating increase for`
			`# question marks)`
			`qm_amplifier = qm_count * 0.18`
			`else:`
			`qm_amplifier = 0.96`
			`return qm_amplifier`

			`def _sift_sentiment_scores(self, sentiments):`
			`# want separate positive versus negative sentiment scores`
			`pos_sum = 0.0`
			`neg_sum = 0.0`
			`neu_count = 0`
			`for sentiment_score in sentiments:`
			`if sentiment_score > 0:`
			`pos_sum += (`
			`float(sentiment_score) + 1`
			`) # compensates for neutral words that are counted as 1`
			`if sentiment_score < 0:`
			`neg_sum += (`
			`float(sentiment_score) - 1`
			`) # when used with math.fabs(), compensates for neutrals`
			`if sentiment_score == 0:`
			`neu_count += 1`
			`return pos_sum, neg_sum, neu_count`

			`def score_valence(self, sentiments, text):`
			`if sentiments:`
			`sum_s = float(sum(sentiments))`
			`# compute and add emphasis from punctuation in text`
			`punct_emph_amplifier = self._punctuation_emphasis(sum_s, text)`
			`if sum_s > 0:`
			`sum_s += punct_emph_amplifier`
			`elif sum_s < 0:`
			`sum_s -= punct_emph_amplifier`

			`compound = normalize(sum_s)`
			`# discriminate between positive, negative and neutral sentiment scores`
			`pos_sum, neg_sum, neu_count = self._sift_sentiment_scores(sentiments)`

			`if pos_sum > math.fabs(neg_sum):`
			`pos_sum += punct_emph_amplifier`
			`elif pos_sum < math.fabs(neg_sum):`
			`neg_sum -= punct_emph_amplifier`

			`total = pos_sum + math.fabs(neg_sum) + neu_count`
			`pos = math.fabs(pos_sum / total)`
			`neg = math.fabs(neg_sum / total)`
			`neu = math.fabs(neu_count / total)`

			`else:`
			`compound = 0.0`
			`pos = 0.0`
			`neg = 0.0`
			`neu = 0.0`

			`sentiment_dict = {`
			`"neg": round(neg, 3),`
			`"neu": round(neu, 3),`
			`"pos": round(pos, 3),`
			`"compound": round(compound, 4),`
			`}`

			`return sentiment_dict`