diff --git a/31_youtube_sentiment.py b/31_youtube_sentiment.py new file mode 100644 index 0000000..f0f2129 --- /dev/null +++ b/31_youtube_sentiment.py @@ -0,0 +1,77 @@ +import sys +import requests +from bs4 import BeautifulSoup as bs4 + +""" +Example usage: + +$ python 31_youtube_sentiment.py https://www.youtube.com/watch?v=_vrAjAHhUsA +""" + + +def get_arguments(): + if len(sys.argv) is 2: + return sys.argv[1] + else: + print('Specify at least 1 argument') + sys.exit() + + +def get_comments(url): + html = requests.get('https://plus.googleapis.com/u/0/_/widget/render/comments?first_party_property=YOUTUBE&href=' + url) + soup = bs4(html.text, 'html.parser') + return [comment.string for comment in soup.findAll('div', class_='Ct')] + + +def calculate_sentiment(comments): + positive = 0 + negative = 0 + negative_words = [ + 'hate', 'hated', 'dislike', 'disliked', 'awful', 'terrible', 'bad', + 'painful', 'worst', 'suck', 'rubbish', 'sad', 'sodding' + ] + positive_words = [ + 'love', 'loved', 'like', 'liked', 'awesome', 'amazing', 'good', + 'great', 'excellent', 'brilliant', 'cool' + ] + for comment in comments: + if comment is None: + continue + else: + for word in comment.split(' '): + if word in negative_words: + negative += 1 + if word in positive_words: + positive += 1 + return {'positive': positive, 'negative': negative} + + +def main(): + url = get_arguments() + if url: + comments = get_comments(url) + if len(comments) <= 0: + print('This video has no comments.') + sys.exit() + sentiment = calculate_sentiment(comments) + positive_score = sentiment['positive'] + negative_score = sentiment['negative'] + total_score = positive_score + negative_score + if positive_score > negative_score: + print('This video is generally positive:') + print('{0} positive / {1} total hits'.format( + positive_score, total_score)) + elif negative_score > positive_score: + print('This video is generally negative:') + print ('{0} negative / {1} total hits'.format( + negative_score, total_score)) + else: + print('This video is mutual:') + print('{0} positive {1} negative'.format( + positive_score, negative_score)) + else: + print('No url supplied') + + +if __name__ == '__main__': + main() diff --git a/readme.md b/readme.md index 0bfba2f..363d345 100644 --- a/readme.md +++ b/readme.md @@ -30,3 +30,4 @@ 1. **28_income_tax_calculator.py**: Income tax calculator via [Taxee](http://taxee.io/) 1. **29_json_to_yaml.py**: Convert JSON to YAML 1. **30_fullcontact.py**: Call the [FullcContact](https://www.fullcontact.com/developer/) API +1. **31_youtube_sentiment.py**: Calculate sentiment score from the comments of a Youtube video diff --git a/requirements.txt b/requirements.txt index 9d61831..b65ee68 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +beautifulsoup4==4.4.1 PyYAML==3.11 requests==2.7.0 wheel==0.24.0