changed en lemmatizer to PorterStemmer

This commit is contained in:
rjawor 2019-07-14 22:29:07 +02:00
parent 427755d40b
commit 053ee2e040
3 changed files with 21 additions and 4 deletions

View File

@ -3,13 +3,14 @@
from flask import Flask
from flask_restful import Api
from server.lemmatizer_resource import LemmatizerResource
import pickle, spacy
import pickle
from nltk.stem import PorterStemmer
app = Flask('preprocessor')
api = Api(app)
lemmatizer = spacy.load('en', disable=['parser', 'ner'])
lemmatizer = PorterStemmer()
print("Lemmatizer initialized")
pickle_in = open("dictionaries/dict.pickle","rb")

View File

@ -42,7 +42,8 @@ class LemmatizerResource(Resource):
if lemma is None:
lemma = word
elif language == 'en':
doc = self.lemmatizer(word)
lemma = doc[0].lemma_
lemma = self.lemmatizer.stem(word)
if len(lemma) == 0:
lemma = word
return lemma

15
tests/lemmatize_speed.py Executable file
View File

@ -0,0 +1,15 @@
#!/usr/bin/python3
import requests, json, time
def do_lemmatize(data):
response = requests.post(url = 'http://127.0.0.1:10002/lemmatize', json = data)
return json.loads(response.text)
start = time.time()
data = {'language':'en', 'sentences':100*['this is just one of the sentences for testing']}
do_lemmatize(data)
end = time.time()
print("The operation took %.4f s" % (end - start))