challenging-america-word-ga.../trigram.ipynb

1 line
60 KiB
Plaintext
Raw Normal View History

2023-05-10 00:37:23 +02:00
{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":20920,"status":"ok","timestamp":1682226874288,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"pFXwuw2YtOWN","outputId":"ddb140d5-77c5-41a4-fe8a-7391e5846171"},"outputs":[{"name":"stdout","output_type":"stream","text":["Mounted at /content/drive\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":27,"status":"ok","timestamp":1682226874290,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"Cw_AulzZuagH","outputId":"559b5da8-3d20-4330-f7a6-26cf2b8d5255"},"outputs":[{"name":"stdout","output_type":"stream","text":["/content/drive/MyDrive\n"]}],"source":["cd drive/MyDrive"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":20,"status":"ok","timestamp":1682226874292,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"EvEOrP78ubFh","outputId":"8c8bad1b-1f10-4ff0-f4f4-958e5b66240b"},"outputs":[{"name":"stdout","output_type":"stream","text":["/content/drive/MyDrive/challenging-america-word-gap-prediction\n"]}],"source":["cd challenging-america-word-gap-prediction/"]},{"cell_type":"code","execution_count":4,"metadata":{"executionInfo":{"elapsed":20,"status":"ok","timestamp":1682226874297,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"0AOFd9SzukED"},"outputs":[],"source":["import pandas as pd"]},{"cell_type":"code","execution_count":5,"metadata":{"executionInfo":{"elapsed":45413,"status":"ok","timestamp":1682226919691,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"rJ8liwTuujTr"},"outputs":[],"source":["data = pd.read_csv(\"train/in.tsv.xz\", sep=\"\\t\", on_bad_lines='skip', header=None, encoding=\"utf-8\")\n","\n","exp_words = pd.read_csv(\"train/expected.tsv\", sep=\"\\t\", on_bad_lines='skip', header=None, encoding=\"utf-8\")\n"]},{"cell_type":"code","execution_count":6,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":684},"executionInfo":{"elapsed":72,"status":"ok","timestamp":1682226919692,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"NzW_UsYNwKGR","outputId":"fd18c049-d4b8-40ef-922c-e062e1178c31"},"outputs":[{"data":{"text/html":["\n"," \u003cdiv id=\"df-be7090c8-ba16-4dff-b789-dd35439a08c1\"\u003e\n"," \u003cdiv class=\"colab-df-container\"\u003e\n"," \u003cdiv\u003e\n","\u003cstyle scoped\u003e\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","\u003c/style\u003e\n","\u003ctable border=\"1\" class=\"dataframe\"\u003e\n"," \u003cthead\u003e\n"," \u003ctr style=\"text-align: right;\"\u003e\n"," \u003cth\u003e\u003c/th\u003e\n"," \u003cth\u003e0\u003c/th\u003e\n"," \u003cth\u003e1\u003c/th\u003e\n"," \u003cth\u003e2\u003c/th\u003e\n"," \u003cth\u003e3\u003c/th\u003e\n"," \u003cth\u003e4\u003c/th\u003e\n"," \u003cth\u003e5\u003c/th\u003e\n"," \u003cth\u003e6\u003c/th\u003e\n"," \u003cth\u003e7\u003c/th\u003e\n"," \u003c/tr\u003e\n"," \u003c/thead\u003e\n"," \u003ctbody\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e0\u003c/th\u003e\n"," \u003ctd\u003e4e04702da929c78c52baf09c1851d3ff\u003c/td\u003e\n"," \u003ctd\u003eST\u003c/td\u003e\n"," \u003ctd\u003eChronAm\u003c/td\u003e\n"," \u003ctd\u003e1919.604110\u003c/td\u003e\n"," \u003ctd\u003e30.475470\u003c/td\u003e\n"," \u003ctd\u003e-90.100911\u003c/td\u003e\n"," \u003ctd\u003ecame fiom the last place t