Fix

2022-05-27 21:33:35 +02:00 · 2022-05-27 21:33:35 +02:00 · f3f14088fb
commit f3f14088fb
parent c9207fce50
3 changed files with 119446 additions and 190287 deletions
--- a/run.py
+++ b/run.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # coding: utf-8

-# In[208]:
+# In[1]:


 import pandas as pd
@ -9,26 +9,26 @@ import vowpalwabbit
 from sklearn import preprocessing


-# In[209]:
+# In[2]:


 vw = vowpalwabbit.Workspace('--oaa 20')


-# In[210]:
+# In[3]:


 X_train = pd.read_csv('train\in.tsv', sep='\t', usecols=[2], names=['text'])
 Y_train = pd.read_csv('train\expected.tsv', sep='\t', usecols=[0], names=['class'])


-# In[211]:
+# In[4]:


 Y_train['class'].unique()


-# In[212]:
+# In[5]:


 le = preprocessing.LabelEncoder()
@ -36,20 +36,20 @@ le.fit(['business', 'culture', 'lifestyle', 'news', 'opinion', 'removed', 'sport
 Y_train['class'] = le.fit_transform(Y_train['class'])


-# In[213]:
+# In[6]:


 for x, y in zip(X_train['text'], Y_train['class']):
    vw.learn(f'{y} | text:{x}')


-# In[216]:
+# In[16]:


 def make_prediction(path_in, path_out):
    test_set = pd.read_csv(path_in, sep='\t', usecols=[2], names=['text'])
    predictions = []
-    for x in X_dev0['text']:
+    for x in test_set['text']:
        predictions.append(vw.predict(f'| text:{x}'))
    predictions = le.inverse_transform(predictions)
    file = open(path_out, 'w')
@ -58,19 +58,19 @@ def make_prediction(path_in, path_out):
    file.close()


-# In[217]:
+# In[17]:


 make_prediction('dev-0\in.tsv', 'dev-0\out.tsv')


-# In[218]:
+# In[18]:


 make_prediction('test-A\in.tsv', 'test-A\out.tsv')


-# In[219]:
+# In[19]:


 make_prediction('test-B\in.tsv', 'test-B\out.tsv')
--- a/test-A/out.tsv
+++ b/test-A/out.tsv
--- a/test-B/out.tsv
+++ b/test-B/out.tsv