3 changed files with 0 additions and 10584 deletions
--- a/dev-0/out.tsv
+++ b/dev-0/out.tsv
--- a/foo.py
+++ b/foo.py
@ -1,160 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-# In[3]:
-
-
-import pathlib
-from collections import Counter
-from sklearn.metrics import *
-import pandas as pd
-
-
-# In[1]:
-
-
-import numpy as np, pandas as pd
-import seaborn as sns
-import matplotlib.pyplot as plt
-from sklearn.datasets import fetch_20newsgroups
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.naive_bayes import MultinomialNB
-from sklearn.pipeline import make_pipeline
-from sklearn.metrics import confusion_matrix, accuracy_score
-sns.set() # use seaborn plotting style
-
-
-# In[5]:
-
-
-train_x = pd.read_csv('train/in.tsv', header=None, sep='\t')
-train_y = pd.read_csv('train/expected.tsv', header=None, sep='\t')
-dev_x = pd.read_csv('dev-0/in.tsv', header=None, sep='\t')
-dev_y = pd.read_csv('dev-0/expected.tsv', header=None, sep='\t')
-test_x = pd.read_csv('test-A/in.tsv', header=None, sep='\t')
-
-
-# In[61]:
-
-
-print(dev_y.shape)
-print(dev_x.shape)
-
-
-# In[11]:
-
-
-print(train_x[:15])
-
-
-# In[27]:
-
-
-print(train_x.shape)
-
-
-# In[49]:
-
-
-print(train_y.shape)
-
-
-# In[8]:
-
-
-print(train_y[:15])
-
-
-# In[53]:
-
-
-print(dev_x[:4])
-
-
-# In[119]:
-
-
-from sklearn.feature_extraction.text import CountVectorizer
-from sklearn.feature_extraction.text import TfidfTransformer
-
-vec = CountVectorizer(stop_words='english')
-x1 = vec.fit_transform(train_x[:20000][0])
-tfidf_transformer = TfidfTransformer()
-
-x1_tf = tfidf_transformer.fit_transform(x1)
-
-
-# In[120]:
-
-
-# Build the model
-#model = make_pipeline(TfidfVectorizer(), MultinomialNB())
-clf = MultinomialNB().fit(x1_tf, train_y[:20000][0])
-
-
-# In[121]:
-
-
-# Train the model using the training data
-#model.fit(x1[:][0], train_y[:289541][0])
-# Predict the categories of the test data
-X_new_counts = vec.transform(dev_x[:][0])
-# We call transform instead of fit_transform because it's already been fit
-X_new_tfidf = tfidf_transformer.transform(X_new_counts)
-#predicted_categories = model.predict(dev_x[:][0])
-
-
-# In[122]:
-
-
-predicted = clf.predict(X_new_tfidf)
-
-
-# In[125]:
-
-
-print(predicted[:10])
-
-
-# In[126]:
-
-
-print(predicted.shape)
-
-
-# In[123]:
-
-
-#mat = confusion_matrix(dev_y[:][0],predicted_categories)
-
-print("The accuracy is {}".format(accuracy_score( dev_y[:][0],predicted_categories)))
- 
-
-
-# In[124]:
-
-
-print('We got an accuracy of',np.mean(predicted == dev_y[:][0])*100, '% over the test data.')
-
-
-# In[130]:
-
-
-np.savetxt("out.tsv",predicted, delimiter="\t", fmt='%d')
-
-
-# In[131]:
-
-
-X_test = vec.transform(test_x[:][0])
-# We call transform instead of fit_transform because it's already been fit
-X_tfidf_test = tfidf_transformer.transform(X_test)
-predicted_test = clf.predict(X_tfidf_test)
-np.savetxt("out.tsv",predicted_test, delimiter="\t", fmt='%d')
-
-
-# In[ ]:
-
-
-
-
--- a/test-A/out.tsv
+++ b/test-A/out.tsv