ver 4 full train + stop words

This commit is contained in:
Łukasz Jędyk 2021-05-13 11:16:26 +02:00
parent 78ae1c52a8
commit ee39a6ffd5
3 changed files with 34222 additions and 34221 deletions

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,7 @@ import pandas as pd
from sklearn.linear_model import LinearRegression from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import TfidfVectorizer
from stop_words import get_stop_words
col_names = ['start_date', 'end_date', 'title', 'source', 'content'] col_names = ['start_date', 'end_date', 'title', 'source', 'content']
@ -17,7 +18,7 @@ X_dev = dev_set['content']
X_test = test_set['content'] X_test = test_set['content']
print('Trenowanie modelu...') print('Trenowanie modelu...')
model = make_pipeline(TfidfVectorizer(), LinearRegression()) model = make_pipeline(TfidfVectorizer(stop_words=get_stop_words('polish')), LinearRegression())
model.fit(X_train, y_train) model.fit(X_train, y_train)
print('Predykcje...') print('Predykcje...')

File diff suppressed because it is too large Load Diff