changed en lemmatizer to PorterStemmer
This commit is contained in:
parent
427755d40b
commit
053ee2e040
@ -3,13 +3,14 @@
|
||||
from flask import Flask
|
||||
from flask_restful import Api
|
||||
from server.lemmatizer_resource import LemmatizerResource
|
||||
import pickle, spacy
|
||||
import pickle
|
||||
from nltk.stem import PorterStemmer
|
||||
|
||||
app = Flask('preprocessor')
|
||||
api = Api(app)
|
||||
|
||||
|
||||
lemmatizer = spacy.load('en', disable=['parser', 'ner'])
|
||||
lemmatizer = PorterStemmer()
|
||||
print("Lemmatizer initialized")
|
||||
|
||||
pickle_in = open("dictionaries/dict.pickle","rb")
|
||||
|
@ -42,7 +42,8 @@ class LemmatizerResource(Resource):
|
||||
if lemma is None:
|
||||
lemma = word
|
||||
elif language == 'en':
|
||||
doc = self.lemmatizer(word)
|
||||
lemma = doc[0].lemma_
|
||||
lemma = self.lemmatizer.stem(word)
|
||||
if len(lemma) == 0:
|
||||
lemma = word
|
||||
return lemma
|
||||
|
||||
|
15
tests/lemmatize_speed.py
Executable file
15
tests/lemmatize_speed.py
Executable file
@ -0,0 +1,15 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import requests, json, time
|
||||
|
||||
|
||||
def do_lemmatize(data):
|
||||
response = requests.post(url = 'http://127.0.0.1:10002/lemmatize', json = data)
|
||||
return json.loads(response.text)
|
||||
|
||||
|
||||
start = time.time()
|
||||
data = {'language':'en', 'sentences':100*['this is just one of the sentences for testing']}
|
||||
do_lemmatize(data)
|
||||
end = time.time()
|
||||
print("The operation took %.4f s" % (end - start))
|
Loading…
Reference in New Issue
Block a user