From 2c5b3c6c96e8fdae99c5e87dff962c548d7d4c87 Mon Sep 17 00:00:00 2001 From: s440058 Date: Sun, 20 Jun 2021 20:05:17 +0200 Subject: [PATCH] add bayes --- Untitled.ipynb | 82 -------------------------------------------------- 1 file changed, 82 deletions(-) delete mode 100644 Untitled.ipynb diff --git a/Untitled.ipynb b/Untitled.ipynb deleted file mode 100644 index 92522ca..0000000 --- a/Untitled.ipynb +++ /dev/null @@ -1,82 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "b'Skipping line 25706: expected 2 fields, saw 3\\nSkipping line 58881: expected 2 fields, saw 3\\nSkipping line 73761: expected 2 fields, saw 3\\n'\n", - "b'Skipping line 1983: expected 1 fields, saw 2\\nSkipping line 5199: expected 1 fields, saw 2\\n'\n" - ] - } - ], - "source": [ - "from sklearn.naive_bayes import GaussianNB\n", - "import pandas as pd\n", - "from sklearn.naive_bayes import MultinomialNB\n", - "from sklearn.feature_extraction.text import TfidfVectorizer\n", - "\n", - "r_in = './train/train.tsv'\n", - "\n", - "r_ind_ev = './dev-0/in.tsv'\n", - "tsv_read = pd.read_table(r_in, error_bad_lines=False, sep='\\t', header=None)\n", - "tsv_read_dev = pd.read_table(r_ind_ev, error_bad_lines=False, sep='\\t', header=None)\n", - "\n", - "y_train = tsv_read[0].values\n", - "X_train = tsv_read[1].values\n", - "X_dev = tsv_read_dev[0].values\n", - "\n", - "vectorizer = TfidfVectorizer()\n", - "counts = vectorizer.fit_transform(X_train)\n", - "\n", - "\n", - "classifier = MultinomialNB()\n", - "classifier.fit(counts, y_train)\n", - "\n", - "counts2 = vectorizer.transform(X_dev)\n", - "predictions = classifier.predict(counts2)\n", - "\n", - "predictions.tofile(\"./dev-0/out.tsv\", sep='\\n')\n", - "\n", - "tsv_read_test_in = pd.read_table('./test-A/in.tsv', error_bad_lines=False, header= None)\n", - "X_test= tsv_read_test_in[0].values\n", - "\n", - "counts3 = vectorizer.transform(X_test)\n", - "predictions_test_A = classifier.predict(counts3)\n", - "predictions_test_A.tofile('./test-A/out.tsv', sep='\\n')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}