Compare commits

..

No commits in common. "master" and "master" have entirely different histories.

5 changed files with 0 additions and 45919 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

BIN
geval

Binary file not shown.

View File

@ -1,136 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import csv\n",
"from sklearn.linear_model import LinearRegression\n",
"from stop_words import get_stop_words\n",
"from sklearn.feature_extraction.text import TfidfVectorizer"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#trening\n",
"\n",
"#dane treningowe\n",
"train_data = pd.read_csv('train/train.tsv.xz', compression='xz', header=None, sep='\\t')\n",
"\n",
"#regresja liniowa\n",
"LR = LinearRegression()\n",
"#vectorizer\n",
"VEC = TfidfVectorizer(stop_words=get_stop_words('polish'))\n",
"#wektoryzacja danych treningowych\n",
"train_x = VEC.fit_transform(train_data[4])\n",
"#średnia dat\n",
"dm = (train_data[0] + train_data[1])/2\n",
"#trening\n",
"LR.fit(train_x, dm)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"#dev-0 predict\n",
"\n",
"#dane treningowe\n",
"dev0_data = pd.read_csv('dev-0/in.tsv', header=None, error_bad_lines=False, quoting=csv.QUOTE_NONE, sep='\\t')\n",
"\n",
"#wektoryzacja danych treningowych\n",
"dev0_x = VEC.transform(dev0_data[0])\n",
"#predykcja\n",
"dev0_y = LR.predict(dev0_x)\n",
"#zapis wyników\n",
"dev0_y.tofile('dev-0/out.tsv', sep='\\n')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"#dev-1 predict\n",
"\n",
"#dane treningowe\n",
"dev1_data = pd.read_csv('dev-1/in.tsv', header=None, error_bad_lines=False, quoting=csv.QUOTE_NONE, sep='\\t')\n",
"\n",
"#wektoryzacja danych treningowych\n",
"dev1_x = VEC.transform(dev1_data[0])\n",
"#predykcja\n",
"dev1_y = LR.predict(dev1_x)\n",
"#zapis wyników\n",
"dev1_y.tofile('dev-1/out.tsv', sep='\\n')"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"#test-A predict\n",
"\n",
"#dane treningowe\n",
"testA_data = pd.read_csv('test-A/in.tsv', header=None, error_bad_lines=False, quoting=csv.QUOTE_NONE, sep='\\t')\n",
"\n",
"#wektoryzacja danych treningowych\n",
"testA_x = VEC.transform(testA_data[0])\n",
"#predykcja\n",
"testA_y = LR.predict(testA_x)\n",
"#zapis wyników\n",
"testA_y.tofile('test-A/out.tsv', sep='\\n')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
},
"metadata": {
"interpreter": {
"hash": "d4bdc0d8028da516e3b937f3ab23da3f18f7264589053952c883afefa2219368"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because it is too large Load Diff