Compare commits

..

No commits in common. "master" and "master" have entirely different histories.

11 changed files with 0 additions and 131165 deletions

View File

@ -1,143 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 22,
"id": "ce420679-f5aa-4c83-a912-3c4afa982d7e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\Users\\Adrian\\anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3444: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n",
"\n",
"\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
"b'Skipping line 25706: expected 2 fields, saw 3\\nSkipping line 58881: expected 2 fields, saw 3\\nSkipping line 73761: expected 2 fields, saw 3\\n'\n"
]
}
],
"source": [
"import pandas as pd\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.naive_bayes import MultinomialNB\n",
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"\n",
"\n",
"df = pd.read_csv(\"train/train.tsv\", sep=\"\\t\", header=None, error_bad_lines=False)\n",
"df = df.head(1000)\n",
"\n",
"\n",
"dev_x = pd.read_csv(\"dev-0/in.tsv\", sep=\"\\t\", header=None, error_bad_lines=False)\n",
"\n",
"\n",
"with open('test-A/in.tsv', 'r', encoding='utf8') as file:\n",
" test = file.readlines()\n",
"test = pd.Series(test)\n",
"\n",
"\n",
"x = df[1]\n",
"y = df[0]\n",
"\n",
"model = make_pipeline(TfidfVectorizer(), MultinomialNB())\n",
"model.fit(x,y)\n",
"\n",
"pred_dev = model.predict(dev_x[0])\n",
"pred_dev = pd.Series(pred_dev)\n",
"\n",
"with open('dev-0/out.tsv', 'wt') as file:\n",
" for pred in pred_dev:\n",
" file.write(str(pred)+'\\n')\n",
"\n",
"\n",
"pred_test = model.predict(test)\n",
"pred_test = pd.Series(pred_test)\n",
"pred_test = pred_test.astype('int')\n",
"\n",
"\n",
" \n",
"with open('test-A/out.tsv', 'wt') as file:\n",
" for pred in pred_test:\n",
" file.write(str(pred)+'\\n')\n",
"\n",
"\n",
"\n",
"\n",
"\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "3e2a9ef0-6da0-4934-8099-378d859ae04e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0\n",
"0 ATP Sztokholm: Juergen Zopp wykorzystał szansę...\n",
"1 Krowicki z reprezentacją kobiet aż do igrzysk ...\n",
"2 Wielki powrót Łukasza Kubota Odradza się zawsz...\n",
"3 Marcel Hirscher wygrał ostatni slalom gigant m...\n",
"4 Polki do Czarnogóry z pełnią zaangażowania. Sy...\n",
"... ...\n",
"5440 Biało-czerwona siła w Falun. Oni będą reprezen...\n",
"5441 Finał WTA Tokio na żywo: Woźniacka - Osaka LIV...\n",
"5442 Oni zapisali się w annałach. Hubert Hurkacz 15...\n",
"5443 Poprawia się stan Nikiego Laudy. Austriak może...\n",
"5444 Liga Mistrzów. Zabójcza końcówka Interu Mediol...\n",
"\n",
"[5445 rows x 1 columns]\n",
"0 ATP Sztokholm: Juergen Zopp wykorzystał szansę...\n",
"1 Krowicki z reprezentacją kobiet aż do igrzysk ...\n",
"2 Wielki powrót Łukasza Kubota Odradza się zawsz...\n",
"3 Marcel Hirscher wygrał ostatni slalom gigant m...\n",
"4 Polki do Czarnogóry z pełnią zaangażowania. Sy...\n",
" ... \n",
"5442 Biało-czerwona siła w Falun. Oni będą reprezen...\n",
"5443 Finał WTA Tokio na żywo: Woźniacka - Osaka LIV...\n",
"5444 Oni zapisali się w annałach. Hubert Hurkacz 15...\n",
"5445 Poprawia się stan Nikiego Laudy. Austriak może...\n",
"5446 Liga Mistrzów. Zabójcza końcówka Interu Mediol...\n",
"Length: 5447, dtype: object\n"
]
}
],
"source": [
"print(test)\n",
"\n",
"\n",
"\n",
"\n",
"print(Xtest)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -1,49 +0,0 @@
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
df = pd.read_csv("train/train.tsv", sep="\t", header=None, error_bad_lines=False)
df = df.head(1000)
dev_x = pd.read_csv("dev-0/in.tsv", sep="\t", header=None, error_bad_lines=False)
with open('test-A/in.tsv', 'r', encoding='utf8') as file:
test = file.readlines()
test = pd.Series(test)
x = df[1]
y = df[0]
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(x,y)
pred_dev = model.predict(dev_x[0])
pred_dev = pd.Series(pred_dev)
with open('dev-0/out.tsv', 'wt') as file:
for pred in pred_dev:
file.write(str(pred)+'\n')
pred_test = model.predict(test)
pred_test = pd.Series(pred_test)
pred_test = pred_test.astype('int')
with open('test-A/out.tsv', 'wt') as file:
for pred in pred_test:
file.write(str(pred)+'\n')

View File

@ -1,95 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"id": "ce420679-f5aa-4c83-a912-3c4afa982d7e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\Users\\Adrian\\anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3444: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n",
"\n",
"\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
"b'Skipping line 25706: expected 2 fields, saw 3\\nSkipping line 58881: expected 2 fields, saw 3\\nSkipping line 73761: expected 2 fields, saw 3\\n'\n"
]
}
],
"source": [
"import pandas as pd\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.naive_bayes import MultinomialNB\n",
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"\n",
"\n",
"df = pd.read_csv(\"train/train.tsv\", sep=\"\\t\", header=None, error_bad_lines=False)\n",
"\n",
"\n",
"\n",
"dev_x = pd.read_csv(\"dev-0/in.tsv\", sep=\"\\t\", header=None, error_bad_lines=False)\n",
"\n",
"\n",
"with open('test-A/in.tsv', 'r', encoding='utf8') as file:\n",
" test = file.readlines()\n",
"test = pd.Series(test)\n",
"\n",
"\n",
"x = df[1]\n",
"y = df[0]\n",
"\n",
"model = make_pipeline(TfidfVectorizer(), MultinomialNB())\n",
"model.fit(x,y)\n",
"\n",
"pred_dev = model.predict(dev_x[0])\n",
"pred_dev = pd.Series(pred_dev)\n",
"\n",
"with open('dev-0/out.tsv', 'wt') as file:\n",
" for pred in pred_dev:\n",
" file.write(str(pred)+'\\n')\n",
"\n",
"\n",
"pred_test = model.predict(test)\n",
"pred_test = pd.Series(pred_test)\n",
"pred_test = pred_test.astype('int')\n",
"\n",
"\n",
" \n",
"with open('test-A/out.tsv', 'wt') as file:\n",
" for pred in pred_test:\n",
" file.write(str(pred)+'\\n')\n",
"\n",
"\n",
"\n",
"\n",
"\n",
" \n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

49
run.py
View File

@ -1,49 +0,0 @@
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
df = pd.read_csv("train/train.tsv", sep="\t", header=None, error_bad_lines=False)
df = df.head(1000)
dev_x = pd.read_csv("dev-0/in.tsv", sep="\t", header=None, error_bad_lines=False)
with open('test-A/in.tsv', 'r', encoding='utf8') as file:
test = file.readlines()
test = pd.Series(test)
x = df[1]
y = df[0]
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(x,y)
pred_dev = model.predict(dev_x[0])
pred_dev = pd.Series(pred_dev)
with open('dev-0/out.tsv', 'wt') as file:
for pred in pred_dev:
file.write(str(pred)+'\n')
pred_test = model.predict(test)
pred_test = pd.Series(pred_test)
pred_test = pred_test.astype('int')
with open('test-A/out.tsv', 'wt') as file:
for pred in pred_test:
file.write(str(pred)+'\n')

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff