forked from kubapok/retroc2
ver 4 full train + stop words
This commit is contained in:
parent
78ae1c52a8
commit
ee39a6ffd5
40000
dev-0/out.tsv
40000
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
3
main.py
3
main.py
@ -3,6 +3,7 @@ import pandas as pd
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from stop_words import get_stop_words
|
||||
|
||||
col_names = ['start_date', 'end_date', 'title', 'source', 'content']
|
||||
|
||||
@ -17,7 +18,7 @@ X_dev = dev_set['content']
|
||||
X_test = test_set['content']
|
||||
|
||||
print('Trenowanie modelu...')
|
||||
model = make_pipeline(TfidfVectorizer(), LinearRegression())
|
||||
model = make_pipeline(TfidfVectorizer(stop_words=get_stop_words('polish')), LinearRegression())
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
print('Predykcje...')
|
||||
|
28440
test-A/out.tsv
28440
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user