{ "cells": [ { "cell_type": "code", "execution_count": 9, "outputs": [], "source": [ "#!/usr/bin/env python\n", "# coding: utf-8\n", "\n", "from sklearn.naive_bayes import MultinomialNB\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "import lzma\n", "\n", "X_train = lzma.open(\"train/in.tsv.xz\", mode='rt', encoding='utf-8').readlines()\n", "y_train = open('train/expected.tsv').readlines()\n", "X_dev0 = lzma.open(\"dev-0/in.tsv.xz\", mode='rt', encoding='utf-8').readlines()\n", "y_expected_dev0 = open(\"dev-0/expected.tsv\", \"r\").readlines()\n", "X_test = lzma.open(\"test-A/in.tsv.xz\", mode='rt', encoding='utf-8').readlines()" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 10, "outputs": [], "source": [ "count_vect = CountVectorizer()\n", "X_train_counts = count_vect.fit_transform(X_train)\n", "X_dev0_counts = count_vect.transform(X_dev0)\n", "X_test_counts = count_vect.transform(X_test)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 11, "outputs": [], "source": [ "clf = MultinomialNB().fit(X_train_counts, y_train)\n", "\n", "y_predicted_dev0_MNB = clf.predict(X_dev0_counts)\n", "y_predicted_test_MNB = clf.predict(X_test_counts)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 12, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy dev0: 0.8025417298937785\n" ] } ], "source": [ "accuracy_dev0_MNB = accuracy_score(y_expected_dev0, y_predicted_dev0_MNB)\n", "print(f\"Accuracy dev0: {accuracy_dev0_MNB}\")\n" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 13, "outputs": [], "source": [ "open(\"dev-0/out.tsv\", mode='w').writelines(y_predicted_dev0_MNB)\n", "open(\"test-A/out.tsv\", mode='w').writelines(y_predicted_test_MNB)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }