{ "cells": [ { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "import sklearn\n", "from sklearn.pipeline import make_pipeline\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "import numpy as np\n", "from sklearn.naive_bayes import MultinomialNB\n", "from sklearn.preprocessing import LabelEncoder " ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "def getInput(path):\n", " with open(path,encoding='utf-8') as f:\n", " return f.readlines()" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/c/Users/mkoci/Desktop/naiwny_bayes\n" ] } ], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "train_in=getInput('./train/in.tsv')\n", "train_expected=getInput('./train/expected.tsv')\n", "test_in=getInput('./test-A/in.tsv')\n", "dev_in=getInput('./dev-0/in.tsv')\n", "dev_expected=getInput('./dev-0/expected.tsv')" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "pipeline = make_pipeline(TfidfVectorizer(),MultinomialNB())\n", "encTransform = LabelEncoder().fit_transform(train_expected)\n", "model = pipeline.fit(train_in, encTransform)\n", "dev_predicted = model.predict(dev_in)\n", "test_predicted = model.predict(test_in)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "with open('./dev-0/out.tsv', \"w\") as result:\n", " for out in dev_predicted:\n", " result.write(str(out) + '\\n')\n", "with open('./test-A/out.tsv', \"w\") as result:\n", " for out in test_predicted:\n", " result.write(str(out) + '\\n') " ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[NbConvertApp] Converting notebook Naiwny_bayes.ipynb to script\n", "[NbConvertApp] Writing 1337 bytes to Naiwny_bayes.py\n" ] } ], "source": [ "!jupyter nbconvert --to script Naiwny_bayes.ipynb" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" } }, "nbformat": 4, "nbformat_minor": 4 }