Compare commits
4 Commits
Author | SHA1 | Date |
---|---|---|
Adi | 97572efbcf | |
Adi | 6c3ca75b83 | |
Adi | dc2a76c237 | |
Adi | 8d0c0507e9 |
|
@ -0,0 +1,143 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "ce420679-f5aa-4c83-a912-3c4afa982d7e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Users\\Adrian\\anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3444: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
"b'Skipping line 25706: expected 2 fields, saw 3\\nSkipping line 58881: expected 2 fields, saw 3\\nSkipping line 73761: expected 2 fields, saw 3\\n'\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
||||
"from sklearn.naive_bayes import MultinomialNB\n",
|
||||
"from sklearn.pipeline import make_pipeline\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(\"train/train.tsv\", sep=\"\\t\", header=None, error_bad_lines=False)\n",
|
||||
"df = df.head(1000)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"dev_x = pd.read_csv(\"dev-0/in.tsv\", sep=\"\\t\", header=None, error_bad_lines=False)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with open('test-A/in.tsv', 'r', encoding='utf8') as file:\n",
|
||||
" test = file.readlines()\n",
|
||||
"test = pd.Series(test)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"x = df[1]\n",
|
||||
"y = df[0]\n",
|
||||
"\n",
|
||||
"model = make_pipeline(TfidfVectorizer(), MultinomialNB())\n",
|
||||
"model.fit(x,y)\n",
|
||||
"\n",
|
||||
"pred_dev = model.predict(dev_x[0])\n",
|
||||
"pred_dev = pd.Series(pred_dev)\n",
|
||||
"\n",
|
||||
"with open('dev-0/out.tsv', 'wt') as file:\n",
|
||||
" for pred in pred_dev:\n",
|
||||
" file.write(str(pred)+'\\n')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"pred_test = model.predict(test)\n",
|
||||
"pred_test = pd.Series(pred_test)\n",
|
||||
"pred_test = pred_test.astype('int')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"with open('test-A/out.tsv', 'wt') as file:\n",
|
||||
" for pred in pred_test:\n",
|
||||
" file.write(str(pred)+'\\n')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "3e2a9ef0-6da0-4934-8099-378d859ae04e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 0\n",
|
||||
"0 ATP Sztokholm: Juergen Zopp wykorzystał szansę...\n",
|
||||
"1 Krowicki z reprezentacją kobiet aż do igrzysk ...\n",
|
||||
"2 Wielki powrót Łukasza Kubota Odradza się zawsz...\n",
|
||||
"3 Marcel Hirscher wygrał ostatni slalom gigant m...\n",
|
||||
"4 Polki do Czarnogóry z pełnią zaangażowania. Sy...\n",
|
||||
"... ...\n",
|
||||
"5440 Biało-czerwona siła w Falun. Oni będą reprezen...\n",
|
||||
"5441 Finał WTA Tokio na żywo: Woźniacka - Osaka LIV...\n",
|
||||
"5442 Oni zapisali się w annałach. Hubert Hurkacz 15...\n",
|
||||
"5443 Poprawia się stan Nikiego Laudy. Austriak może...\n",
|
||||
"5444 Liga Mistrzów. Zabójcza końcówka Interu Mediol...\n",
|
||||
"\n",
|
||||
"[5445 rows x 1 columns]\n",
|
||||
"0 ATP Sztokholm: Juergen Zopp wykorzystał szansę...\n",
|
||||
"1 Krowicki z reprezentacją kobiet aż do igrzysk ...\n",
|
||||
"2 Wielki powrót Łukasza Kubota Odradza się zawsz...\n",
|
||||
"3 Marcel Hirscher wygrał ostatni slalom gigant m...\n",
|
||||
"4 Polki do Czarnogóry z pełnią zaangażowania. Sy...\n",
|
||||
" ... \n",
|
||||
"5442 Biało-czerwona siła w Falun. Oni będą reprezen...\n",
|
||||
"5443 Finał WTA Tokio na żywo: Woźniacka - Osaka LIV...\n",
|
||||
"5444 Oni zapisali się w annałach. Hubert Hurkacz 15...\n",
|
||||
"5445 Poprawia się stan Nikiego Laudy. Austriak może...\n",
|
||||
"5446 Liga Mistrzów. Zabójcza końcówka Interu Mediol...\n",
|
||||
"Length: 5447, dtype: object\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(test)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(Xtest)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
|
||||
|
||||
df = pd.read_csv("train/train.tsv", sep="\t", header=None, error_bad_lines=False)
|
||||
df = df.head(1000)
|
||||
|
||||
|
||||
dev_x = pd.read_csv("dev-0/in.tsv", sep="\t", header=None, error_bad_lines=False)
|
||||
|
||||
|
||||
with open('test-A/in.tsv', 'r', encoding='utf8') as file:
|
||||
test = file.readlines()
|
||||
test = pd.Series(test)
|
||||
|
||||
|
||||
x = df[1]
|
||||
y = df[0]
|
||||
|
||||
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
||||
model.fit(x,y)
|
||||
|
||||
pred_dev = model.predict(dev_x[0])
|
||||
pred_dev = pd.Series(pred_dev)
|
||||
|
||||
with open('dev-0/out.tsv', 'wt') as file:
|
||||
for pred in pred_dev:
|
||||
file.write(str(pred)+'\n')
|
||||
|
||||
|
||||
pred_test = model.predict(test)
|
||||
pred_test = pd.Series(pred_test)
|
||||
pred_test = pred_test.astype('int')
|
||||
|
||||
|
||||
|
||||
with open('test-A/out.tsv', 'wt') as file:
|
||||
for pred in pred_test:
|
||||
file.write(str(pred)+'\n')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "ce420679-f5aa-4c83-a912-3c4afa982d7e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Users\\Adrian\\anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3444: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
"b'Skipping line 25706: expected 2 fields, saw 3\\nSkipping line 58881: expected 2 fields, saw 3\\nSkipping line 73761: expected 2 fields, saw 3\\n'\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
||||
"from sklearn.naive_bayes import MultinomialNB\n",
|
||||
"from sklearn.pipeline import make_pipeline\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(\"train/train.tsv\", sep=\"\\t\", header=None, error_bad_lines=False)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"dev_x = pd.read_csv(\"dev-0/in.tsv\", sep=\"\\t\", header=None, error_bad_lines=False)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with open('test-A/in.tsv', 'r', encoding='utf8') as file:\n",
|
||||
" test = file.readlines()\n",
|
||||
"test = pd.Series(test)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"x = df[1]\n",
|
||||
"y = df[0]\n",
|
||||
"\n",
|
||||
"model = make_pipeline(TfidfVectorizer(), MultinomialNB())\n",
|
||||
"model.fit(x,y)\n",
|
||||
"\n",
|
||||
"pred_dev = model.predict(dev_x[0])\n",
|
||||
"pred_dev = pd.Series(pred_dev)\n",
|
||||
"\n",
|
||||
"with open('dev-0/out.tsv', 'wt') as file:\n",
|
||||
" for pred in pred_dev:\n",
|
||||
" file.write(str(pred)+'\\n')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"pred_test = model.predict(test)\n",
|
||||
"pred_test = pd.Series(pred_test)\n",
|
||||
"pred_test = pred_test.astype('int')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"with open('test-A/out.tsv', 'wt') as file:\n",
|
||||
" for pred in pred_test:\n",
|
||||
" file.write(str(pred)+'\\n')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,49 @@
|
|||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
|
||||
|
||||
df = pd.read_csv("train/train.tsv", sep="\t", header=None, error_bad_lines=False)
|
||||
df = df.head(1000)
|
||||
|
||||
|
||||
dev_x = pd.read_csv("dev-0/in.tsv", sep="\t", header=None, error_bad_lines=False)
|
||||
|
||||
|
||||
with open('test-A/in.tsv', 'r', encoding='utf8') as file:
|
||||
test = file.readlines()
|
||||
test = pd.Series(test)
|
||||
|
||||
|
||||
x = df[1]
|
||||
y = df[0]
|
||||
|
||||
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
||||
model.fit(x,y)
|
||||
|
||||
pred_dev = model.predict(dev_x[0])
|
||||
pred_dev = pd.Series(pred_dev)
|
||||
|
||||
with open('dev-0/out.tsv', 'wt') as file:
|
||||
for pred in pred_dev:
|
||||
file.write(str(pred)+'\n')
|
||||
|
||||
|
||||
pred_test = model.predict(test)
|
||||
pred_test = pd.Series(pred_test)
|
||||
pred_test = pred_test.astype('int')
|
||||
|
||||
|
||||
|
||||
with open('test-A/out.tsv', 'wt') as file:
|
||||
for pred in pred_test:
|
||||
file.write(str(pred)+'\n')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue