from flask import request from flask_restful import Resource import requests, json, re, pickle class LemmatizerResource(Resource): def __init__(self, **kwargs): self.lemmatizer = kwargs['lemmatizer'] self.dictionary = kwargs['dictionary'] def post(self): json_data = request.get_json(force=True) if not 'language' in json_data: return {'error':'Missing parameter: language'}, 400 if not 'sentences' in json_data: return {'error':'Missing parameter: sentences'}, 400 language = json_data['language'] if language not in ['pl', 'en']: return {'error':'Unsupported language: %s' % language}, 400 sentences = [] for sentence in json_data['sentences']: sentences.append(self.processSentence(sentence, language)) result = { 'processed_sentences':sentences } return result, 200 def processSentence(self, sentence, language): raw_tokens = sentence.split() tokens = [self.lemmatizeWord(token, language) for token in raw_tokens] return {'tokens':' '.join(tokens), 'isFirstLemmatized':self.isFirstLemmatized(raw_tokens, language)} def isFirstLemmatized(self, raw_tokens, language): if language == 'pl' and len(raw_tokens) > 0: first_token = raw_tokens[0] if self.lemmatizeWord(first_token, language) != first_token: return False return True def lemmatizeWord(self, word, language): if len(word) == 1: return word lemma = None if language == 'pl': lemma = self.dictionary.get(word, None) if lemma is None: lemma = word elif language == 'en': lemma = self.lemmatizer.stem(word) if len(lemma) == 0: lemma = word return lemma