forked from kubapok/retroc2
ver 4 full train + stop words
This commit is contained in:
parent
78ae1c52a8
commit
ee39a6ffd5
40000
dev-0/out.tsv
40000
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
3
main.py
3
main.py
@ -3,6 +3,7 @@ import pandas as pd
|
|||||||
from sklearn.linear_model import LinearRegression
|
from sklearn.linear_model import LinearRegression
|
||||||
from sklearn.pipeline import make_pipeline
|
from sklearn.pipeline import make_pipeline
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from stop_words import get_stop_words
|
||||||
|
|
||||||
col_names = ['start_date', 'end_date', 'title', 'source', 'content']
|
col_names = ['start_date', 'end_date', 'title', 'source', 'content']
|
||||||
|
|
||||||
@ -17,7 +18,7 @@ X_dev = dev_set['content']
|
|||||||
X_test = test_set['content']
|
X_test = test_set['content']
|
||||||
|
|
||||||
print('Trenowanie modelu...')
|
print('Trenowanie modelu...')
|
||||||
model = make_pipeline(TfidfVectorizer(), LinearRegression())
|
model = make_pipeline(TfidfVectorizer(stop_words=get_stop_words('polish')), LinearRegression())
|
||||||
model.fit(X_train, y_train)
|
model.fit(X_train, y_train)
|
||||||
|
|
||||||
print('Predykcje...')
|
print('Predykcje...')
|
||||||
|
28440
test-A/out.tsv
28440
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user