diff --git a/.ipynb_checkpoints/run-checkpoint.ipynb b/.ipynb_checkpoints/run-checkpoint.ipynb index 7e29419..326f602 100644 --- a/.ipynb_checkpoints/run-checkpoint.ipynb +++ b/.ipynb_checkpoints/run-checkpoint.ipynb @@ -52,14 +52,15 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 23, "id": "ea8069f7-de8e-454c-8eac-9fb7cc0df626", "metadata": {}, "outputs": [], "source": [ "def jurisdiction(path_in, path_out): \n", " with open(path_in, 'r', encoding='utf8') as file:\n", - " lines = file.readlines() \n", + " lines = file.readlines()\n", + " lines = lines.replace('.', ' ').replace(',', ' ').lower()\n", " with open(path_out, 'wt')as file_out:\n", " for i in lines:\n", " file_out.write(\"jurisdiction=\"+str(state_prediction(i))+'\\n') \n", @@ -80,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 24, "id": "594a25a9-a0ce-4de9-82c8-df50a4ecac39", "metadata": {}, "outputs": [ @@ -89,7 +90,7 @@ "output_type": "stream", "text": [ "[NbConvertApp] Converting notebook run.ipynb to script\n", - "[NbConvertApp] Writing 1634 bytes to run.py\n" + "[NbConvertApp] Writing 1697 bytes to run.py\n" ] } ], diff --git a/.ipynb_checkpoints/run-checkpoint.py b/.ipynb_checkpoints/run-checkpoint.py index c872c65..d743e11 100644 --- a/.ipynb_checkpoints/run-checkpoint.py +++ b/.ipynb_checkpoints/run-checkpoint.py @@ -35,12 +35,13 @@ def state_prediction(text_in): return max(state_dict, key=state_dict.get) -# In[20]: +# In[23]: def jurisdiction(path_in, path_out): with open(path_in, 'r', encoding='utf8') as file: - lines = file.readlines() + lines = file.readlines() + lines = lines.replace('.', ' ').replace(',', ' ').lower() with open(path_out, 'wt')as file_out: for i in lines: file_out.write("jurisdiction="+str(state_prediction(i))+'\n') @@ -55,7 +56,7 @@ jurisdiction('train/in.tsv', 'train/out.tsv') jurisdiction('test-A/in.tsv', 'test-A/out.tsv') -# In[ ]: +# In[22]: # get_ipython().system('jupyter nbconvert --to script run.ipynb') diff --git a/run.ipynb b/run.ipynb index 7e29419..326f602 100644 --- a/run.ipynb +++ b/run.ipynb @@ -52,14 +52,15 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 23, "id": "ea8069f7-de8e-454c-8eac-9fb7cc0df626", "metadata": {}, "outputs": [], "source": [ "def jurisdiction(path_in, path_out): \n", " with open(path_in, 'r', encoding='utf8') as file:\n", - " lines = file.readlines() \n", + " lines = file.readlines()\n", + " lines = lines.replace('.', ' ').replace(',', ' ').lower()\n", " with open(path_out, 'wt')as file_out:\n", " for i in lines:\n", " file_out.write(\"jurisdiction=\"+str(state_prediction(i))+'\\n') \n", @@ -80,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 24, "id": "594a25a9-a0ce-4de9-82c8-df50a4ecac39", "metadata": {}, "outputs": [ @@ -89,7 +90,7 @@ "output_type": "stream", "text": [ "[NbConvertApp] Converting notebook run.ipynb to script\n", - "[NbConvertApp] Writing 1634 bytes to run.py\n" + "[NbConvertApp] Writing 1697 bytes to run.py\n" ] } ], diff --git a/run.py b/run.py index c872c65..d743e11 100644 --- a/run.py +++ b/run.py @@ -35,12 +35,13 @@ def state_prediction(text_in): return max(state_dict, key=state_dict.get) -# In[20]: +# In[23]: def jurisdiction(path_in, path_out): with open(path_in, 'r', encoding='utf8') as file: - lines = file.readlines() + lines = file.readlines() + lines = lines.replace('.', ' ').replace(',', ' ').lower() with open(path_out, 'wt')as file_out: for i in lines: file_out.write("jurisdiction="+str(state_prediction(i))+'\n') @@ -55,7 +56,7 @@ jurisdiction('train/in.tsv', 'train/out.tsv') jurisdiction('test-A/in.tsv', 'test-A/out.tsv') -# In[ ]: +# In[22]: # get_ipython().system('jupyter nbconvert --to script run.ipynb')