final
This commit is contained in:
parent
9cb2fb2612
commit
8d0c0507e9
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"cells": [],
|
||||
"metadata": {},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
|
||||
|
||||
df = pd.read_csv("train/train.tsv", sep="\t", header=None, error_bad_lines=False)
|
||||
dev_x = pd.read_csv("dev-0/in.tsv", sep="\t", header=None, error_bad_lines=False)
|
||||
test_x = pd.read_csv("test-A/in.tsv", sep="\t", header=None, error_bad_lines=False)
|
||||
|
||||
x = df[1]
|
||||
y = df[0]
|
||||
|
||||
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
||||
model.fit(x,y)
|
||||
|
||||
pred_dev = model.predict(dev_x[0])
|
||||
pred_test = model.predict(test_x[0])
|
||||
|
||||
|
||||
with open('dev-0/out.tsv', 'wt') as f:
|
||||
for pred in pred_dev:
|
||||
f.write(str(pred)+'\n')
|
||||
|
||||
with open('test-A/out.tsv', 'wt') as f:
|
||||
for pred in pred_test:
|
||||
f.write(str(pred)+'\n')
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "ce420679-f5aa-4c83-a912-3c4afa982d7e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\Users\\Adrian\\anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3444: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
"b'Skipping line 25706: expected 2 fields, saw 3\\nSkipping line 58881: expected 2 fields, saw 3\\nSkipping line 73761: expected 2 fields, saw 3\\n'\n",
|
||||
"b'Skipping line 1983: expected 1 fields, saw 2\\nSkipping line 5199: expected 1 fields, saw 2\\n'\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
||||
"from sklearn.naive_bayes import MultinomialNB\n",
|
||||
"from sklearn.pipeline import make_pipeline\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(\"train/train.tsv\", sep=\"\\t\", header=None, error_bad_lines=False)\n",
|
||||
"dev_x = pd.read_csv(\"dev-0/in.tsv\", sep=\"\\t\", header=None, error_bad_lines=False)\n",
|
||||
"test_x = pd.read_csv(\"test-A/in.tsv\", sep=\"\\t\", header=None, error_bad_lines=False)\n",
|
||||
"\n",
|
||||
"x = df[1]\n",
|
||||
"y = df[0]\n",
|
||||
"\n",
|
||||
"model = make_pipeline(TfidfVectorizer(), MultinomialNB())\n",
|
||||
"model.fit(x,y)\n",
|
||||
"\n",
|
||||
"pred_dev = model.predict(dev_x[0])\n",
|
||||
"pred_test = model.predict(test_x[0])\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with open('dev-0/out.tsv', 'wt') as f:\n",
|
||||
" for pred in pred_dev:\n",
|
||||
" f.write(str(pred)+'\\n')\n",
|
||||
" \n",
|
||||
"with open('test-A/out.tsv', 'wt') as f:\n",
|
||||
" for pred in pred_test:\n",
|
||||
" f.write(str(pred)+'\\n')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3e2a9ef0-6da0-4934-8099-378d859ae04e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,33 @@
|
|||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
|
||||
|
||||
df = pd.read_csv("train/train.tsv", sep="\t", header=None, error_bad_lines=False)
|
||||
dev_x = pd.read_csv("dev-0/in.tsv", sep="\t", header=None, error_bad_lines=False)
|
||||
test_x = pd.read_csv("test-A/in.tsv", sep="\t", header=None, error_bad_lines=False)
|
||||
|
||||
x = df[1]
|
||||
y = df[0]
|
||||
|
||||
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
||||
model.fit(x,y)
|
||||
|
||||
pred_dev = model.predict(dev_x[0])
|
||||
pred_test = model.predict(test_x[0])
|
||||
|
||||
|
||||
with open('dev-0/out.tsv', 'wt') as f:
|
||||
for pred in pred_dev:
|
||||
f.write(str(pred)+'\n')
|
||||
|
||||
with open('test-A/out.tsv', 'wt') as f:
|
||||
for pred in pred_test:
|
||||
f.write(str(pred)+'\n')
|
||||
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue