Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
deda838b5a |
20000
dev-0/out.tsv
Normal file
20000
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
89
linear.ipynb
Normal file
89
linear.ipynb
Normal file
@ -0,0 +1,89 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
||||
"from sklearn.linear_model import LinearRegression\n",
|
||||
"from stop_words import get_stop_words\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectorizer = TfidfVectorizer(stop_words = get_stop_words('polish'))\n",
|
||||
"linear = LinearRegression()\n",
|
||||
"\n",
|
||||
"train = pd.read_csv('train/train.tsv', sep = \"\\t\", names = ['start_date', 'end_date', 'title', 'sort_title', 'data'])\n",
|
||||
"\n",
|
||||
"mean = (train['start_date'] + train['end_date']) / 2\n",
|
||||
"tv = vectorizer.fit_transform(train['data'])\n",
|
||||
"linear.fit(tv, mean)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def getData(directory):\n",
|
||||
" with open(directory, encoding=\"utf-8\") as file:\n",
|
||||
" return file.readlines()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tmp_dev = getData('dev-0/in.tsv')\n",
|
||||
"dataFrame_dev = pd.DataFrame(data = tmp_dev)\n",
|
||||
"evaluate_dev = linear.predict(vectorizer.transform(dataFrame_dev[0]))\n",
|
||||
"np.savetxt('dev-0/out.tsv', evaluate, fmt='%f', delimiter='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tmp_test = getData('test-A/in.tsv')\n",
|
||||
"dataFrame_test = pd.DataFrame(data = tmp_test)\n",
|
||||
"evaluate_test = linear.predict(vectorizer.transform(dataFrame_test[0]))\n",
|
||||
"np.savetxt('test-A/out.tsv', evaluate, fmt='%f', delimiter='\\n')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
14220
test-A/out.tsv
Normal file
14220
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user