changed en lemmatizer to PorterStemmer
This commit is contained in:
parent
427755d40b
commit
053ee2e040
@ -3,13 +3,14 @@
|
|||||||
from flask import Flask
|
from flask import Flask
|
||||||
from flask_restful import Api
|
from flask_restful import Api
|
||||||
from server.lemmatizer_resource import LemmatizerResource
|
from server.lemmatizer_resource import LemmatizerResource
|
||||||
import pickle, spacy
|
import pickle
|
||||||
|
from nltk.stem import PorterStemmer
|
||||||
|
|
||||||
app = Flask('preprocessor')
|
app = Flask('preprocessor')
|
||||||
api = Api(app)
|
api = Api(app)
|
||||||
|
|
||||||
|
|
||||||
lemmatizer = spacy.load('en', disable=['parser', 'ner'])
|
lemmatizer = PorterStemmer()
|
||||||
print("Lemmatizer initialized")
|
print("Lemmatizer initialized")
|
||||||
|
|
||||||
pickle_in = open("dictionaries/dict.pickle","rb")
|
pickle_in = open("dictionaries/dict.pickle","rb")
|
||||||
|
@ -42,7 +42,8 @@ class LemmatizerResource(Resource):
|
|||||||
if lemma is None:
|
if lemma is None:
|
||||||
lemma = word
|
lemma = word
|
||||||
elif language == 'en':
|
elif language == 'en':
|
||||||
doc = self.lemmatizer(word)
|
lemma = self.lemmatizer.stem(word)
|
||||||
lemma = doc[0].lemma_
|
if len(lemma) == 0:
|
||||||
|
lemma = word
|
||||||
return lemma
|
return lemma
|
||||||
|
|
||||||
|
15
tests/lemmatize_speed.py
Executable file
15
tests/lemmatize_speed.py
Executable file
@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import requests, json, time
|
||||||
|
|
||||||
|
|
||||||
|
def do_lemmatize(data):
|
||||||
|
response = requests.post(url = 'http://127.0.0.1:10002/lemmatize', json = data)
|
||||||
|
return json.loads(response.text)
|
||||||
|
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
data = {'language':'en', 'sentences':100*['this is just one of the sentences for testing']}
|
||||||
|
do_lemmatize(data)
|
||||||
|
end = time.time()
|
||||||
|
print("The operation took %.4f s" % (end - start))
|
Loading…
Reference in New Issue
Block a user