1 line
60 KiB
Plaintext
1 line
60 KiB
Plaintext
{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":20920,"status":"ok","timestamp":1682226874288,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"pFXwuw2YtOWN","outputId":"ddb140d5-77c5-41a4-fe8a-7391e5846171"},"outputs":[{"name":"stdout","output_type":"stream","text":["Mounted at /content/drive\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":27,"status":"ok","timestamp":1682226874290,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"Cw_AulzZuagH","outputId":"559b5da8-3d20-4330-f7a6-26cf2b8d5255"},"outputs":[{"name":"stdout","output_type":"stream","text":["/content/drive/MyDrive\n"]}],"source":["cd drive/MyDrive"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":20,"status":"ok","timestamp":1682226874292,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"EvEOrP78ubFh","outputId":"8c8bad1b-1f10-4ff0-f4f4-958e5b66240b"},"outputs":[{"name":"stdout","output_type":"stream","text":["/content/drive/MyDrive/challenging-america-word-gap-prediction\n"]}],"source":["cd challenging-america-word-gap-prediction/"]},{"cell_type":"code","execution_count":4,"metadata":{"executionInfo":{"elapsed":20,"status":"ok","timestamp":1682226874297,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"0AOFd9SzukED"},"outputs":[],"source":["import pandas as pd"]},{"cell_type":"code","execution_count":5,"metadata":{"executionInfo":{"elapsed":45413,"status":"ok","timestamp":1682226919691,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"rJ8liwTuujTr"},"outputs":[],"source":["data = pd.read_csv(\"train/in.tsv.xz\", sep=\"\\t\", on_bad_lines='skip', header=None, encoding=\"utf-8\")\n","\n","exp_words = pd.read_csv(\"train/expected.tsv\", sep=\"\\t\", on_bad_lines='skip', header=None, encoding=\"utf-8\")\n"]},{"cell_type":"code","execution_count":6,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":684},"executionInfo":{"elapsed":72,"status":"ok","timestamp":1682226919692,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"NzW_UsYNwKGR","outputId":"fd18c049-d4b8-40ef-922c-e062e1178c31"},"outputs":[{"data":{"text/html":["\n"," \u003cdiv id=\"df-be7090c8-ba16-4dff-b789-dd35439a08c1\"\u003e\n"," \u003cdiv class=\"colab-df-container\"\u003e\n"," \u003cdiv\u003e\n","\u003cstyle scoped\u003e\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","\u003c/style\u003e\n","\u003ctable border=\"1\" class=\"dataframe\"\u003e\n"," \u003cthead\u003e\n"," \u003ctr style=\"text-align: right;\"\u003e\n"," \u003cth\u003e\u003c/th\u003e\n"," \u003cth\u003e0\u003c/th\u003e\n"," \u003cth\u003e1\u003c/th\u003e\n"," \u003cth\u003e2\u003c/th\u003e\n"," \u003cth\u003e3\u003c/th\u003e\n"," \u003cth\u003e4\u003c/th\u003e\n"," \u003cth\u003e5\u003c/th\u003e\n"," \u003cth\u003e6\u003c/th\u003e\n"," \u003cth\u003e7\u003c/th\u003e\n"," \u003c/tr\u003e\n"," \u003c/thead\u003e\n"," \u003ctbody\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e0\u003c/th\u003e\n"," \u003ctd\u003e4e04702da929c78c52baf09c1851d3ff\u003c/td\u003e\n"," \u003ctd\u003eST\u003c/td\u003e\n"," \u003ctd\u003eChronAm\u003c/td\u003e\n"," \u003ctd\u003e1919.604110\u003c/td\u003e\n"," \u003ctd\u003e30.475470\u003c/td\u003e\n"," \u003ctd\u003e-90.100911\u003c/td\u003e\n"," \u003ctd\u003ecame fiom the last place to this\\nplace, and t...\u003c/td\u003e\n"," \u003ctd\u003esaid\\nit's all squash. The best I could get\\ni...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e1\u003c/th\u003e\n"," \u003ctd\u003eb374dadd940510271d9675d3e8caf9d8\u003c/td\u003e\n"," \u003ctd\u003eDAILY ARIZONA SILVER BELT\u003c/td\u003e\n"," \u003ctd\u003eChronAm\u003c/td\u003e\n"," \u003ctd\u003e1909.097260\u003c/td\u003e\n"," \u003ctd\u003e33.399478\u003c/td\u003e\n"," \u003ctd\u003e-110.870950\u003c/td\u003e\n"," \u003ctd\u003eMB. BOOT'S POLITICAL OBEED\\nAttempt to imagine...\u003c/td\u003e\n"," \u003ctd\u003e\\ninto a proper perspective with those\\nminor ...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e2\u003c/th\u003e\n"," \u003ctd\u003eadb666c426bdc10fd949cb824da6c0d0\u003c/td\u003e\n"," \u003ctd\u003eTHE SAVANNAH MORNING NEWS\u003c/td\u003e\n"," \u003ctd\u003eChronAm\u003c/td\u003e\n"," \u003ctd\u003e1900.913699\u003c/td\u003e\n"," \u003ctd\u003e32.080926\u003c/td\u003e\n"," \u003ctd\u003e-81.091177\u003c/td\u003e\n"," \u003ctd\u003eThera were in 1771 only aeventy-nine\\n*ub*erlb...\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e3\u003c/th\u003e\n"," \u003ctd\u003ebc2c9aa0b77d724311e3c2e12fc61c92\u003c/td\u003e\n"," \u003ctd\u003eCHARLES CITY INTELLIGENCER\u003c/td\u003e\n"," \u003ctd\u003eChronAm\u003c/td\u003e\n"," \u003ctd\u003e1864.974044\u003c/td\u003e\n"," \u003ctd\u003e43.066361\u003c/td\u003e\n"," \u003ctd\u003e-92.672411\u003c/td\u003e\n"," \u003ctd\u003ewhenever any prize property shall!*' condemn- ...\u003c/td\u003e\n"," \u003ctd\u003ethe ceitihcate of'\\noperate to prevent tfie ma...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e4\u003c/th\u003e\n"," \u003ctd\u003e0f612b991a39c712f0d745835b8b2f0d\u003c/td\u003e\n"," \u003ctd\u003eEVENING STAR\u003c/td\u003e\n"," \u003ctd\u003eChronAm\u003c/td\u003e\n"," \u003ctd\u003e1878.478082\u003c/td\u003e\n"," \u003ctd\u003e38.894955\u003c/td\u003e\n"," \u003ctd\u003e-77.036646\u003c/td\u003e\n"," \u003ctd\u003eSA LKOFVALUABLE UNIMPBOV\u0026amp;D RE\\\\L\\nJSIATF. ON T...\u003c/td\u003e\n"," \u003ctd\u003e\\nTerms of sale: One-tblrd, togethor with the ...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e5\u003c/th\u003e\n"," \u003ctd\u003e4c13fb3d2e6eef35fa28e7bae7868d60\u003c/td\u003e\n"," \u003ctd\u003eEDGEFIELD ADVERTISER\u003c/td\u003e\n"," \u003ctd\u003eChronAm\u003c/td\u003e\n"," \u003ctd\u003e1913.346575\u003c/td\u003e\n"," \u003ctd\u003e33.789577\u003c/td\u003e\n"," \u003ctd\u003e-81.929558\u003c/td\u003e\n"," \u003ctd\u003eGod includes all. and would we not\\ngrieve if ...\u003c/td\u003e\n"," \u003ctd\u003elot of spiritual\\nwaifs all about us. children...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e6\u003c/th\u003e\n"," \u003ctd\u003ea452eadfc3f4a475147728c5f4005429\u003c/td\u003e\n"," \u003ctd\u003eDAILY LOS ANGELES HERALD\u003c/td\u003e\n"," \u003ctd\u003eChronAm\u003c/td\u003e\n"," \u003ctd\u003e1883.801370\u003c/td\u003e\n"," \u003ctd\u003e34.054935\u003c/td\u003e\n"," \u003ctd\u003e-118.244476\u003c/td\u003e\n"," \u003ctd\u003eThe said action is brought to obtain a decree ...\u003c/td\u003e\n"," \u003ctd\u003ethen to obtain an execution against said Vie\\n...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e7\u003c/th\u003e\n"," \u003ctd\u003eb970ee32372d81f1fd59ab6196e797c9\u003c/td\u003e\n"," \u003ctd\u003eTHE FINDLAY JEFFERSONIAN\u003c/td\u003e\n"," \u003ctd\u003eChronAm\u003c/td\u003e\n"," \u003ctd\u003e1874.828767\u003c/td\u003e\n"," \u003ctd\u003e41.041387\u003c/td\u003e\n"," \u003ctd\u003e-83.650398\u003c/td\u003e\n"," \u003ctd\u003eparty\" is a useless exhortation to intel-\\nlig...\u003c/td\u003e\n"," \u003ctd\u003ewith all tjie hatred that\\nsurvives the war; a...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e8\u003c/th\u003e\n"," \u003ctd\u003ed130f899a50db2792c546cc978dc930c\u003c/td\u003e\n"," \u003ctd\u003eBUTLER CITIZEN\u003c/td\u003e\n"," \u003ctd\u003eChronAm\u003c/td\u003e\n"," \u003ctd\u003e1883.793151\u003c/td\u003e\n"," \u003ctd\u003e40.861021\u003c/td\u003e\n"," \u003ctd\u003e-79.895225\u003c/td\u003e\n"," \u003ctd\u003ehas led me to accept, everything I read\\nwith ...\u003c/td\u003e\n"," \u003ctd\u003ethat the earth has mo-\\ntion. Aday ortwo agoIt...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e9\u003c/th\u003e\n"," \u003ctd\u003e80e56928e09b93529d206708ac905b63\u003c/td\u003e\n"," \u003ctd\u003eFERGUS COUNTY ARGUS\u003c/td\u003e\n"," \u003ctd\u003eChronAm\u003c/td\u003e\n"," \u003ctd\u003e1892.821038\u003c/td\u003e\n"," \u003ctd\u003e47.062473\u003c/td\u003e\n"," \u003ctd\u003e-109.428238\u003c/td\u003e\n"," \u003ctd\u003eThe wool circulars alluded to are\\nthose which...\u003c/td\u003e\n"," \u003ctd\u003eaccuracy, as\\nthey were furnished by him as ch...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003c/tbody\u003e\n","\u003c/table\u003e\n","\u003c/div\u003e\n"," \u003cbutton class=\"colab-df-convert\" onclick=\"convertToInteractive('df-be7090c8-ba16-4dff-b789-dd35439a08c1')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\"\u003e\n"," \n"," \u003csvg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\"\u003e\n"," \u003cpath d=\"M0 0h24v24H0V0z\" fill=\"none\"/\u003e\n"," \u003cpath d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/\u003e\u003cpath d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/\u003e\n"," \u003c/svg\u003e\n"," \u003c/button\u003e\n"," \n"," \u003cstyle\u003e\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," \u003c/style\u003e\n","\n"," \u003cscript\u003e\n"," const buttonEl =\n"," document.querySelector('#df-be7090c8-ba16-4dff-b789-dd35439a08c1 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-be7090c8-ba16-4dff-b789-dd35439a08c1');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '\u003ca target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb\u003edata table notebook\u003c/a\u003e'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," \u003c/script\u003e\n"," \u003c/div\u003e\n"," \u003c/div\u003e\n"," "],"text/plain":[" 0 1 2 \\\n","0 4e04702da929c78c52baf09c1851d3ff ST ChronAm \n","1 b374dadd940510271d9675d3e8caf9d8 DAILY ARIZONA SILVER BELT ChronAm \n","2 adb666c426bdc10fd949cb824da6c0d0 THE SAVANNAH MORNING NEWS ChronAm \n","3 bc2c9aa0b77d724311e3c2e12fc61c92 CHARLES CITY INTELLIGENCER ChronAm \n","4 0f612b991a39c712f0d745835b8b2f0d EVENING STAR ChronAm \n","5 4c13fb3d2e6eef35fa28e7bae7868d60 EDGEFIELD ADVERTISER ChronAm \n","6 a452eadfc3f4a475147728c5f4005429 DAILY LOS ANGELES HERALD ChronAm \n","7 b970ee32372d81f1fd59ab6196e797c9 THE FINDLAY JEFFERSONIAN ChronAm \n","8 d130f899a50db2792c546cc978dc930c BUTLER CITIZEN ChronAm \n","9 80e56928e09b93529d206708ac905b63 FERGUS COUNTY ARGUS ChronAm \n","\n"," 3 4 5 \\\n","0 1919.604110 30.475470 -90.100911 \n","1 1909.097260 33.399478 -110.870950 \n","2 1900.913699 32.080926 -81.091177 \n","3 1864.974044 43.066361 -92.672411 \n","4 1878.478082 38.894955 -77.036646 \n","5 1913.346575 33.789577 -81.929558 \n","6 1883.801370 34.054935 -118.244476 \n","7 1874.828767 41.041387 -83.650398 \n","8 1883.793151 40.861021 -79.895225 \n","9 1892.821038 47.062473 -109.428238 \n","\n"," 6 \\\n","0 came fiom the last place to this\\nplace, and t... \n","1 MB. BOOT'S POLITICAL OBEED\\nAttempt to imagine... \n","2 Thera were in 1771 only aeventy-nine\\n*ub*erlb... \n","3 whenever any prize property shall!*' condemn- ... \n","4 SA LKOFVALUABLE UNIMPBOV\u0026D RE\\\\L\\nJSIATF. ON T... \n","5 God includes all. and would we not\\ngrieve if ... \n","6 The said action is brought to obtain a decree ... \n","7 party\" is a useless exhortation to intel-\\nlig... \n","8 has led me to accept, everything I read\\nwith ... \n","9 The wool circulars alluded to are\\nthose which... \n","\n"," 7 \n","0 said\\nit's all squash. The best I could get\\ni... \n","1 \\ninto a proper perspective with those\\nminor ... \n","2 NaN \n","3 the ceitihcate of'\\noperate to prevent tfie ma... \n","4 \\nTerms of sale: One-tblrd, togethor with the ... \n","5 lot of spiritual\\nwaifs all about us. children... \n","6 then to obtain an execution against said Vie\\n... \n","7 with all tjie hatred that\\nsurvives the war; a... \n","8 that the earth has mo-\\ntion. Aday ortwo agoIt... \n","9 accuracy, as\\nthey were furnished by him as ch... "]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["data[:10]"]},{"cell_type":"code","execution_count":7,"metadata":{"executionInfo":{"elapsed":28,"status":"ok","timestamp":1682226919693,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"upTQ5Po9wOSL"},"outputs":[],"source":["train_data = data[[6, 7]]"]},{"cell_type":"code","execution_count":8,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":363},"executionInfo":{"elapsed":28,"status":"ok","timestamp":1682226919694,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"jcWm7qP_wShN","outputId":"4abbe2f6-ca22-4c2c-9bfe-48753d3b66e8"},"outputs":[{"data":{"text/html":["\n"," \u003cdiv id=\"df-3b0a46c9-86ad-46f3-a884-debc98ebff37\"\u003e\n"," \u003cdiv class=\"colab-df-container\"\u003e\n"," \u003cdiv\u003e\n","\u003cstyle scoped\u003e\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","\u003c/style\u003e\n","\u003ctable border=\"1\" class=\"dataframe\"\u003e\n"," \u003cthead\u003e\n"," \u003ctr style=\"text-align: right;\"\u003e\n"," \u003cth\u003e\u003c/th\u003e\n"," \u003cth\u003e6\u003c/th\u003e\n"," \u003cth\u003e7\u003c/th\u003e\n"," \u003c/tr\u003e\n"," \u003c/thead\u003e\n"," \u003ctbody\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e0\u003c/th\u003e\n"," \u003ctd\u003ecame fiom the last place to this\\nplace, and t...\u003c/td\u003e\n"," \u003ctd\u003esaid\\nit's all squash. The best I could get\\ni...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e1\u003c/th\u003e\n"," \u003ctd\u003eMB. BOOT'S POLITICAL OBEED\\nAttempt to imagine...\u003c/td\u003e\n"," \u003ctd\u003e\\ninto a proper perspective with those\\nminor ...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e2\u003c/th\u003e\n"," \u003ctd\u003eThera were in 1771 only aeventy-nine\\n*ub*erlb...\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e3\u003c/th\u003e\n"," \u003ctd\u003ewhenever any prize property shall!*' condemn- ...\u003c/td\u003e\n"," \u003ctd\u003ethe ceitihcate of'\\noperate to prevent tfie ma...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e4\u003c/th\u003e\n"," \u003ctd\u003eSA LKOFVALUABLE UNIMPBOV\u0026amp;D RE\\\\L\\nJSIATF. ON T...\u003c/td\u003e\n"," \u003ctd\u003e\\nTerms of sale: One-tblrd, togethor with the ...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e5\u003c/th\u003e\n"," \u003ctd\u003eGod includes all. and would we not\\ngrieve if ...\u003c/td\u003e\n"," \u003ctd\u003elot of spiritual\\nwaifs all about us. children...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e6\u003c/th\u003e\n"," \u003ctd\u003eThe said action is brought to obtain a decree ...\u003c/td\u003e\n"," \u003ctd\u003ethen to obtain an execution against said Vie\\n...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e7\u003c/th\u003e\n"," \u003ctd\u003eparty\" is a useless exhortation to intel-\\nlig...\u003c/td\u003e\n"," \u003ctd\u003ewith all tjie hatred that\\nsurvives the war; a...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e8\u003c/th\u003e\n"," \u003ctd\u003ehas led me to accept, everything I read\\nwith ...\u003c/td\u003e\n"," \u003ctd\u003ethat the earth has mo-\\ntion. Aday ortwo agoIt...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e9\u003c/th\u003e\n"," \u003ctd\u003eThe wool circulars alluded to are\\nthose which...\u003c/td\u003e\n"," \u003ctd\u003eaccuracy, as\\nthey were furnished by him as ch...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003c/tbody\u003e\n","\u003c/table\u003e\n","\u003c/div\u003e\n"," \u003cbutton class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3b0a46c9-86ad-46f3-a884-debc98ebff37')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\"\u003e\n"," \n"," \u003csvg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\"\u003e\n"," \u003cpath d=\"M0 0h24v24H0V0z\" fill=\"none\"/\u003e\n"," \u003cpath d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/\u003e\u003cpath d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/\u003e\n"," \u003c/svg\u003e\n"," \u003c/button\u003e\n"," \n"," \u003cstyle\u003e\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," \u003c/style\u003e\n","\n"," \u003cscript\u003e\n"," const buttonEl =\n"," document.querySelector('#df-3b0a46c9-86ad-46f3-a884-debc98ebff37 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-3b0a46c9-86ad-46f3-a884-debc98ebff37');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '\u003ca target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb\u003edata table notebook\u003c/a\u003e'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," \u003c/script\u003e\n"," \u003c/div\u003e\n"," \u003c/div\u003e\n"," "],"text/plain":[" 6 \\\n","0 came fiom the last place to this\\nplace, and t... \n","1 MB. BOOT'S POLITICAL OBEED\\nAttempt to imagine... \n","2 Thera were in 1771 only aeventy-nine\\n*ub*erlb... \n","3 whenever any prize property shall!*' condemn- ... \n","4 SA LKOFVALUABLE UNIMPBOV\u0026D RE\\\\L\\nJSIATF. ON T... \n","5 God includes all. and would we not\\ngrieve if ... \n","6 The said action is brought to obtain a decree ... \n","7 party\" is a useless exhortation to intel-\\nlig... \n","8 has led me to accept, everything I read\\nwith ... \n","9 The wool circulars alluded to are\\nthose which... \n","\n"," 7 \n","0 said\\nit's all squash. The best I could get\\ni... \n","1 \\ninto a proper perspective with those\\nminor ... \n","2 NaN \n","3 the ceitihcate of'\\noperate to prevent tfie ma... \n","4 \\nTerms of sale: One-tblrd, togethor with the ... \n","5 lot of spiritual\\nwaifs all about us. children... \n","6 then to obtain an execution against said Vie\\n... \n","7 with all tjie hatred that\\nsurvives the war; a... \n","8 that the earth has mo-\\ntion. Aday ortwo agoIt... \n","9 accuracy, as\\nthey were furnished by him as ch... "]},"execution_count":8,"metadata":{},"output_type":"execute_result"}],"source":["train_data[:10]"]},{"cell_type":"code","execution_count":9,"metadata":{"executionInfo":{"elapsed":26,"status":"ok","timestamp":1682226919695,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"OdEm_SBSwXuY"},"outputs":[],"source":["train_data= pd.concat([train_data, exp_words], axis=1)"]},{"cell_type":"code","execution_count":10,"metadata":{"executionInfo":{"elapsed":25,"status":"ok","timestamp":1682226919696,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"b1TM741wwYdA"},"outputs":[],"source":["train_data.rename(columns={6: 'First Part', 7: 'Second Part', 0:'Expected word'}, inplace=True)"]},{"cell_type":"code","execution_count":11,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":363},"executionInfo":{"elapsed":25,"status":"ok","timestamp":1682226919697,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"W_P2GpVzwbjM","outputId":"6f170a20-fed8-4db1-a479-abb476f5e550"},"outputs":[{"data":{"text/html":["\n"," \u003cdiv id=\"df-6cbd1511-d955-4157-82f6-f2e31a8251e9\"\u003e\n"," \u003cdiv class=\"colab-df-container\"\u003e\n"," \u003cdiv\u003e\n","\u003cstyle scoped\u003e\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","\u003c/style\u003e\n","\u003ctable border=\"1\" class=\"dataframe\"\u003e\n"," \u003cthead\u003e\n"," \u003ctr style=\"text-align: right;\"\u003e\n"," \u003cth\u003e\u003c/th\u003e\n"," \u003cth\u003eFirst Part\u003c/th\u003e\n"," \u003cth\u003eSecond Part\u003c/th\u003e\n"," \u003cth\u003eExpected word\u003c/th\u003e\n"," \u003c/tr\u003e\n"," \u003c/thead\u003e\n"," \u003ctbody\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e0\u003c/th\u003e\n"," \u003ctd\u003ecame fiom the last place to this\\nplace, and t...\u003c/td\u003e\n"," \u003ctd\u003esaid\\nit's all squash. The best I could get\\ni...\u003c/td\u003e\n"," \u003ctd\u003elie\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e1\u003c/th\u003e\n"," \u003ctd\u003eMB. BOOT'S POLITICAL OBEED\\nAttempt to imagine...\u003c/td\u003e\n"," \u003ctd\u003e\\ninto a proper perspective with those\\nminor ...\u003c/td\u003e\n"," \u003ctd\u003ehimself\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e2\u003c/th\u003e\n"," \u003ctd\u003eThera were in 1771 only aeventy-nine\\n*ub*erlb...\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003ctd\u003eof\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e3\u003c/th\u003e\n"," \u003ctd\u003ewhenever any prize property shall!*' condemn- ...\u003c/td\u003e\n"," \u003ctd\u003ethe ceitihcate of'\\noperate to prevent tfie ma...\u003c/td\u003e\n"," \u003ctd\u003eably\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e4\u003c/th\u003e\n"," \u003ctd\u003eSA LKOFVALUABLE UNIMPBOV\u0026amp;D RE\\\\L\\nJSIATF. ON T...\u003c/td\u003e\n"," \u003ctd\u003e\\nTerms of sale: One-tblrd, togethor with the ...\u003c/td\u003e\n"," \u003ctd\u003ej\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e5\u003c/th\u003e\n"," \u003ctd\u003eGod includes all. and would we not\\ngrieve if ...\u003c/td\u003e\n"," \u003ctd\u003elot of spiritual\\nwaifs all about us. children...\u003c/td\u003e\n"," \u003ctd\u003ehe\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e6\u003c/th\u003e\n"," \u003ctd\u003eThe said action is brought to obtain a decree ...\u003c/td\u003e\n"," \u003ctd\u003ethen to obtain an execution against said Vie\\n...\u003c/td\u003e\n"," \u003ctd\u003egraph\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e7\u003c/th\u003e\n"," \u003ctd\u003eparty\" is a useless exhortation to intel-\\nlig...\u003c/td\u003e\n"," \u003ctd\u003ewith all tjie hatred that\\nsurvives the war; a...\u003c/td\u003e\n"," \u003ctd\u003e011\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e8\u003c/th\u003e\n"," \u003ctd\u003ehas led me to accept, everything I read\\nwith ...\u003c/td\u003e\n"," \u003ctd\u003ethat the earth has mo-\\ntion. Aday ortwo agoIt...\u003c/td\u003e\n"," \u003ctd\u003eseparately.\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e9\u003c/th\u003e\n"," \u003ctd\u003eThe wool circulars alluded to are\\nthose which...\u003c/td\u003e\n"," \u003ctd\u003eaccuracy, as\\nthey were furnished by him as ch...\u003c/td\u003e\n"," \u003ctd\u003ea\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003c/tbody\u003e\n","\u003c/table\u003e\n","\u003c/div\u003e\n"," \u003cbutton class=\"colab-df-convert\" onclick=\"convertToInteractive('df-6cbd1511-d955-4157-82f6-f2e31a8251e9')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\"\u003e\n"," \n"," \u003csvg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\"\u003e\n"," \u003cpath d=\"M0 0h24v24H0V0z\" fill=\"none\"/\u003e\n"," \u003cpath d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/\u003e\u003cpath d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/\u003e\n"," \u003c/svg\u003e\n"," \u003c/button\u003e\n"," \n"," \u003cstyle\u003e\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," \u003c/style\u003e\n","\n"," \u003cscript\u003e\n"," const buttonEl =\n"," document.querySelector('#df-6cbd1511-d955-4157-82f6-f2e31a8251e9 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-6cbd1511-d955-4157-82f6-f2e31a8251e9');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '\u003ca target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb\u003edata table notebook\u003c/a\u003e'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," \u003c/script\u003e\n"," \u003c/div\u003e\n"," \u003c/div\u003e\n"," "],"text/plain":[" First Part \\\n","0 came fiom the last place to this\\nplace, and t... \n","1 MB. BOOT'S POLITICAL OBEED\\nAttempt to imagine... \n","2 Thera were in 1771 only aeventy-nine\\n*ub*erlb... \n","3 whenever any prize property shall!*' condemn- ... \n","4 SA LKOFVALUABLE UNIMPBOV\u0026D RE\\\\L\\nJSIATF. ON T... \n","5 God includes all. and would we not\\ngrieve if ... \n","6 The said action is brought to obtain a decree ... \n","7 party\" is a useless exhortation to intel-\\nlig... \n","8 has led me to accept, everything I read\\nwith ... \n","9 The wool circulars alluded to are\\nthose which... \n","\n"," Second Part Expected word \n","0 said\\nit's all squash. The best I could get\\ni... lie \n","1 \\ninto a proper perspective with those\\nminor ... himself \n","2 NaN of \n","3 the ceitihcate of'\\noperate to prevent tfie ma... ably \n","4 \\nTerms of sale: One-tblrd, togethor with the ... j \n","5 lot of spiritual\\nwaifs all about us. children... he \n","6 then to obtain an execution against said Vie\\n... graph \n","7 with all tjie hatred that\\nsurvives the war; a... 011 \n","8 that the earth has mo-\\ntion. Aday ortwo agoIt... separately. \n","9 accuracy, as\\nthey were furnished by him as ch... a "]},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":["train_data[:10]"]},{"cell_type":"code","execution_count":12,"metadata":{"executionInfo":{"elapsed":1465,"status":"ok","timestamp":1682226921141,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"6Zfbmp-IxKUH"},"outputs":[],"source":["train_data['Concatenated'] = train_data['First Part'] + train_data['Expected word'] + train_data['Second Part']"]},{"cell_type":"code","execution_count":13,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":222},"executionInfo":{"elapsed":19,"status":"ok","timestamp":1682226921143,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"ChCFrMCaxgR0","outputId":"0b83dcd1-fabb-4387-ace7-fc8b65853f61"},"outputs":[{"data":{"text/html":["\n"," \u003cdiv id=\"df-05b83a1f-4d59-4594-bf03-7c78347a2c9e\"\u003e\n"," \u003cdiv class=\"colab-df-container\"\u003e\n"," \u003cdiv\u003e\n","\u003cstyle scoped\u003e\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","\u003c/style\u003e\n","\u003ctable border=\"1\" class=\"dataframe\"\u003e\n"," \u003cthead\u003e\n"," \u003ctr style=\"text-align: right;\"\u003e\n"," \u003cth\u003e\u003c/th\u003e\n"," \u003cth\u003eFirst Part\u003c/th\u003e\n"," \u003cth\u003eSecond Part\u003c/th\u003e\n"," \u003cth\u003eExpected word\u003c/th\u003e\n"," \u003cth\u003eConcatenated\u003c/th\u003e\n"," \u003c/tr\u003e\n"," \u003c/thead\u003e\n"," \u003ctbody\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e0\u003c/th\u003e\n"," \u003ctd\u003ecame fiom the last place to this\\nplace, and t...\u003c/td\u003e\n"," \u003ctd\u003esaid\\nit's all squash. The best I could get\\ni...\u003c/td\u003e\n"," \u003ctd\u003elie\u003c/td\u003e\n"," \u003ctd\u003ecame fiom the last place to this\\nplace, and t...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e1\u003c/th\u003e\n"," \u003ctd\u003eMB. BOOT'S POLITICAL OBEED\\nAttempt to imagine...\u003c/td\u003e\n"," \u003ctd\u003e\\ninto a proper perspective with those\\nminor ...\u003c/td\u003e\n"," \u003ctd\u003ehimself\u003c/td\u003e\n"," \u003ctd\u003eMB. BOOT'S POLITICAL OBEED\\nAttempt to imagine...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e2\u003c/th\u003e\n"," \u003ctd\u003eThera were in 1771 only aeventy-nine\\n*ub*erlb...\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003ctd\u003eof\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003c/tbody\u003e\n","\u003c/table\u003e\n","\u003c/div\u003e\n"," \u003cbutton class=\"colab-df-convert\" onclick=\"convertToInteractive('df-05b83a1f-4d59-4594-bf03-7c78347a2c9e')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\"\u003e\n"," \n"," \u003csvg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\"\u003e\n"," \u003cpath d=\"M0 0h24v24H0V0z\" fill=\"none\"/\u003e\n"," \u003cpath d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/\u003e\u003cpath d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/\u003e\n"," \u003c/svg\u003e\n"," \u003c/button\u003e\n"," \n"," \u003cstyle\u003e\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," \u003c/style\u003e\n","\n"," \u003cscript\u003e\n"," const buttonEl =\n"," document.querySelector('#df-05b83a1f-4d59-4594-bf03-7c78347a2c9e button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-05b83a1f-4d59-4594-bf03-7c78347a2c9e');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '\u003ca target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb\u003edata table notebook\u003c/a\u003e'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," \u003c/script\u003e\n"," \u003c/div\u003e\n"," \u003c/div\u003e\n"," "],"text/plain":[" First Part \\\n","0 came fiom the last place to this\\nplace, and t... \n","1 MB. BOOT'S POLITICAL OBEED\\nAttempt to imagine... \n","2 Thera were in 1771 only aeventy-nine\\n*ub*erlb... \n","\n"," Second Part Expected word \\\n","0 said\\nit's all squash. The best I could get\\ni... lie \n","1 \\ninto a proper perspective with those\\nminor ... himself \n","2 NaN of \n","\n"," Concatenated \n","0 came fiom the last place to this\\nplace, and t... \n","1 MB. BOOT'S POLITICAL OBEED\\nAttempt to imagine... \n","2 NaN "]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["train_data[:3]"]},{"cell_type":"code","execution_count":14,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":623},"executionInfo":{"elapsed":5412,"status":"ok","timestamp":1682226926542,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"lQQHHALRxiHj","outputId":"1f9e7539-0345-49d6-90ca-3e4b000b3cb0"},"outputs":[{"data":{"text/html":["\n"," \u003cdiv id=\"df-93a1a65c-61e6-4c01-9954-970e3260ddbe\"\u003e\n"," \u003cdiv class=\"colab-df-container\"\u003e\n"," \u003cdiv\u003e\n","\u003cstyle scoped\u003e\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","\u003c/style\u003e\n","\u003ctable border=\"1\" class=\"dataframe\"\u003e\n"," \u003cthead\u003e\n"," \u003ctr style=\"text-align: right;\"\u003e\n"," \u003cth\u003e\u003c/th\u003e\n"," \u003cth\u003eFirst Part\u003c/th\u003e\n"," \u003cth\u003eSecond Part\u003c/th\u003e\n"," \u003cth\u003eExpected word\u003c/th\u003e\n"," \u003cth\u003eConcatenated\u003c/th\u003e\n"," \u003c/tr\u003e\n"," \u003c/thead\u003e\n"," \u003ctbody\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e0\u003c/th\u003e\n"," \u003ctd\u003ecame fiom the last place to this\\nplace, and t...\u003c/td\u003e\n"," \u003ctd\u003esaid\\nit's all squash. The best I could get\\ni...\u003c/td\u003e\n"," \u003ctd\u003elie\u003c/td\u003e\n"," \u003ctd\u003ecame fiom the last place to this\\nplace, and t...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e1\u003c/th\u003e\n"," \u003ctd\u003eMB. BOOT'S POLITICAL OBEED\\nAttempt to imagine...\u003c/td\u003e\n"," \u003ctd\u003e\\ninto a proper perspective with those\\nminor ...\u003c/td\u003e\n"," \u003ctd\u003ehimself\u003c/td\u003e\n"," \u003ctd\u003eMB. BOOT'S POLITICAL OBEED\\nAttempt to imagine...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e2\u003c/th\u003e\n"," \u003ctd\u003eThera were in 1771 only aeventy-nine\\n*ub*erlb...\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003ctd\u003eof\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e3\u003c/th\u003e\n"," \u003ctd\u003ewhenever any prize property shall!*' condemn- ...\u003c/td\u003e\n"," \u003ctd\u003ethe ceitihcate of'\\noperate to prevent tfie ma...\u003c/td\u003e\n"," \u003ctd\u003eably\u003c/td\u003e\n"," \u003ctd\u003ewhenever any prize property shall!*' condemn- ...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e4\u003c/th\u003e\n"," \u003ctd\u003eSA LKOFVALUABLE UNIMPBOV\u0026amp;D RE\\\\L\\nJSIATF. ON T...\u003c/td\u003e\n"," \u003ctd\u003e\\nTerms of sale: One-tblrd, togethor with the ...\u003c/td\u003e\n"," \u003ctd\u003ej\u003c/td\u003e\n"," \u003ctd\u003eSA LKOFVALUABLE UNIMPBOV\u0026amp;D RE\\\\L\\nJSIATF. ON T...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e...\u003c/th\u003e\n"," \u003ctd\u003e...\u003c/td\u003e\n"," \u003ctd\u003e...\u003c/td\u003e\n"," \u003ctd\u003e...\u003c/td\u003e\n"," \u003ctd\u003e...\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e428512\u003c/th\u003e\n"," \u003ctd\u003eSam Clendenin bad a fancy for Ui«\\nscience of ...\u003c/td\u003e\n"," \u003ctd\u003e\\nSam was arrested.\\nThe case excited a great ...\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e428513\u003c/th\u003e\n"," \u003ctd\u003eWita.htt halting the party ware dilven to the ...\u003c/td\u003e\n"," \u003ctd\u003ethrough the alnp the »Uitors laapeeeed tia.»\\n...\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e428514\u003c/th\u003e\n"," \u003ctd\u003eIt was the last thing that either of\\nthem exp...\u003c/td\u003e\n"," \u003ctd\u003eAgua Negra across the line.\\nIt was a grim pla...\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e428515\u003c/th\u003e\n"," \u003ctd\u003esettlement with the department.\\nIt is also sh...\u003c/td\u003e\n"," \u003ctd\u003e\\na note of Wood, Dialogue fc Co., for\\nc27,im...\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003ctr\u003e\n"," \u003cth\u003e428516\u003c/th\u003e\n"," \u003ctd\u003eFlour quotations—low extras at 1 R0®2 50;\\ncit...\u003c/td\u003e\n"," \u003ctd\u003e3214c;do White at 3614c: Mixed Western at\\n331...\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003ctd\u003eNaN\u003c/td\u003e\n"," \u003c/tr\u003e\n"," \u003c/tbody\u003e\n","\u003c/table\u003e\n","\u003cp\u003e428517 rows × 4 columns\u003c/p\u003e\n","\u003c/div\u003e\n"," \u003cbutton class=\"colab-df-convert\" onclick=\"convertToInteractive('df-93a1a65c-61e6-4c01-9954-970e3260ddbe')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\"\u003e\n"," \n"," \u003csvg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\"\u003e\n"," \u003cpath d=\"M0 0h24v24H0V0z\" fill=\"none\"/\u003e\n"," \u003cpath d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/\u003e\u003cpath d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/\u003e\n"," \u003c/svg\u003e\n"," \u003c/button\u003e\n"," \n"," \u003cstyle\u003e\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," \u003c/style\u003e\n","\n"," \u003cscript\u003e\n"," const buttonEl =\n"," document.querySelector('#df-93a1a65c-61e6-4c01-9954-970e3260ddbe button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-93a1a65c-61e6-4c01-9954-970e3260ddbe');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '\u003ca target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb\u003edata table notebook\u003c/a\u003e'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," \u003c/script\u003e\n"," \u003c/div\u003e\n"," \u003c/div\u003e\n"," "],"text/plain":[" First Part \\\n","0 came fiom the last place to this\\nplace, and t... \n","1 MB. BOOT'S POLITICAL OBEED\\nAttempt to imagine... \n","2 Thera were in 1771 only aeventy-nine\\n*ub*erlb... \n","3 whenever any prize property shall!*' condemn- ... \n","4 SA LKOFVALUABLE UNIMPBOV\u0026D RE\\\\L\\nJSIATF. ON T... \n","... ... \n","428512 Sam Clendenin bad a fancy for Ui«\\nscience of ... \n","428513 Wita.htt halting the party ware dilven to the ... \n","428514 It was the last thing that either of\\nthem exp... \n","428515 settlement with the department.\\nIt is also sh... \n","428516 Flour quotations—low extras at 1 R0®2 50;\\ncit... \n","\n"," Second Part Expected word \\\n","0 said\\nit's all squash. The best I could get\\ni... lie \n","1 \\ninto a proper perspective with those\\nminor ... himself \n","2 NaN of \n","3 the ceitihcate of'\\noperate to prevent tfie ma... ably \n","4 \\nTerms of sale: One-tblrd, togethor with the ... j \n","... ... ... \n","428512 \\nSam was arrested.\\nThe case excited a great ... NaN \n","428513 through the alnp the »Uitors laapeeeed tia.»\\n... NaN \n","428514 Agua Negra across the line.\\nIt was a grim pla... NaN \n","428515 \\na note of Wood, Dialogue fc Co., for\\nc27,im... NaN \n","428516 3214c;do White at 3614c: Mixed Western at\\n331... NaN \n","\n"," Concatenated \n","0 came fiom the last place to this\\nplace, and t... \n","1 MB. BOOT'S POLITICAL OBEED\\nAttempt to imagine... \n","2 NaN \n","3 whenever any prize property shall!*' condemn- ... \n","4 SA LKOFVALUABLE UNIMPBOV\u0026D RE\\\\L\\nJSIATF. ON T... \n","... ... \n","428512 NaN \n","428513 NaN \n","428514 NaN \n","428515 NaN \n","428516 NaN \n","\n","[428517 rows x 4 columns]"]},"execution_count":14,"metadata":{},"output_type":"execute_result"}],"source":["import regex as re\n","train_data.replace('\\n', '', regex=True)"]},{"cell_type":"code","execution_count":15,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2897,"status":"ok","timestamp":1682226929429,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"Mwc2OICGyBw3","outputId":"b72f260f-1c95-438d-c313-df0a49e3c6b3"},"outputs":[{"name":"stderr","output_type":"stream","text":["[nltk_data] Downloading package punkt to /root/nltk_data...\n","[nltk_data] Unzipping tokenizers/punkt.zip.\n"]},{"data":{"text/plain":["True"]},"execution_count":15,"metadata":{},"output_type":"execute_result"}],"source":["import nltk\n","nltk.download('punkt')"]},{"cell_type":"code","execution_count":16,"metadata":{"executionInfo":{"elapsed":313,"status":"ok","timestamp":1682226929738,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"aFx3Qrx1gEYA"},"outputs":[],"source":["from collections import Counter, defaultdict\n"]},{"cell_type":"code","execution_count":17,"metadata":{"executionInfo":{"elapsed":26,"status":"ok","timestamp":1682226929740,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"XBlC3eJok0ys"},"outputs":[],"source":["model2 = defaultdict(lambda: defaultdict(lambda: 0))"]},{"cell_type":"code","execution_count":18,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":87},"executionInfo":{"elapsed":27,"status":"ok","timestamp":1682226929741,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"zMlpr1TixqlL","outputId":"fa31a1d5-1a47-42ba-b409-2b2b3416cc91"},"outputs":[{"data":{"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"},"text/plain":["\"\\n\\nfor _, x in train_data[:1].iterrows():\\n words = nltk.word_tokenize(x['Concatenated'])\\n print(nltk.trigrams(words, pad_left=True, pad_right=True))\\n for word_1, word_2, word_3 in nltk.trigrams(words, pad_left=True, pad_right=True):\\n print('word1: ', word_1)\\n print('word2: ', word_2)\\n print('word3: ', word_3)\\n if word_1 and word_2 and word_3:\\n model2[(word_1, word_3)][word_2] += 1\\n print(model2)\\n\\n\""]},"execution_count":18,"metadata":{},"output_type":"execute_result"}],"source":["'''\n","\n","for _, x in train_data[:1].iterrows():\n"," words = nltk.word_tokenize(x['Concatenated'])\n"," print(nltk.trigrams(words, pad_left=True, pad_right=True))\n"," for word_1, word_2, word_3 in nltk.trigrams(words, pad_left=True, pad_right=True):\n"," print('word1: ', word_1)\n"," print('word2: ', word_2)\n"," print('word3: ', word_3)\n"," if word_1 and word_2 and word_3:\n"," model2[(word_1, word_3)][word_2] += 1\n"," print(model2)\n","\n","'''\n"]},{"cell_type":"code","execution_count":19,"metadata":{"executionInfo":{"elapsed":25,"status":"ok","timestamp":1682226929742,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"mSJdv0vZpeud"},"outputs":[],"source":["for i, ws in enumerate(model2):\n"," print('i ', i)\n"," print('ws ', ws)"]},{"cell_type":"code","execution_count":20,"metadata":{"executionInfo":{"elapsed":26,"status":"ok","timestamp":1682226929744,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"Yq6mgT3KybJz"},"outputs":[],"source":["def strip(text):\n"," txt = str(text).lower().strip()\n"," txt = txt.replace(\"’\", \"'\")\n"," txt = txt.replace(\" this\\\\nplace\", \"this place\")\n"," txt = txt.replace(\"'we\\\\nwere\", \"we were\")\n"," txt = txt.replace(\"'ever\\\\nwas\", \"ever was\")\n"," txt = txt.replace(\"'making\\\\nsuch\", \"making such\")\n"," txt = txt.replace(\"'boot\\\\nto\", \"boot to\")\n"," txt = txt.replace(\"'elsewhere\\\\nfrom\", \"elsewhere from\")\n"," txt=txt.replace(\"United\\\\nStates\",\"United States\")\n"," txt = txt.replace(\"Unit-\\\\ned\",\"United\" )\n"," txt = txt.replace(\"neigh-\\\\nbors\", \"neighbours\")\n"," txt = txt.replace(\"aver-\\\\nage\", \"average\")\n"," txt = txt.replace(\"people\\\\ndown\", \"people down\")\n"," txt =re.compile(r\"'s|[\\-]|\\-\\\\n|\\p{P}\").sub(\"\", txt)\n"," txt = re.compile(r\"[{}\\[\\]\\\u0026%^$*#\\(\\)@\\t\\n0123456789]+\").sub(\" \", txt)\n"," return txt"]},{"cell_type":"code","execution_count":21,"metadata":{"executionInfo":{"elapsed":25,"status":"ok","timestamp":1682226929745,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"HuuXKPGNgADN"},"outputs":[],"source":["model = defaultdict(lambda: defaultdict(lambda: 0))"]},{"cell_type":"code","execution_count":22,"metadata":{"executionInfo":{"elapsed":25,"status":"ok","timestamp":1682226929746,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"},"user_tz":-120},"id":"KKizFt3w-jcZ"},"outputs":[],"source":["#cleaned = []\n","def train(data, m):\n"," for y,x in data.iterrows():\n"," words = nltk.word_tokenize(strip(x['Concatenated']))\n"," for word_1, word_2, word_3 in nltk.trigrams(words, pad_left=True, pad_right=True):\n"," #print(nltk.trigrams(words, pad_left=True, pad_right=True))\n"," if word_1 and word_2 and word_3:\n"," m[(word_1, word_3)][word_2] += 1\n"," for i, ws in enumerate(m):\n"," count = sum(m[ws].values())\n"," for word_2 in m[ws]:\n"," m[ws][word_2] += 0.25\n"," m[ws][word_2] /= float(count + 0.25 + len(word_2))\n"," return m\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true,"output_embedded_package_id":"1xROs_yg3XH0KVTYXFFHQxpZ0etxe-BlV"},"id":"WO8_SIvxhd-s","outputId":"85241055-0e22-46e7-8227-5d86a8c562bb"},"outputs":[],"source":["train(train_data[:100000], model)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"nl9mhnJmsKfr"},"outputs":[],"source":["def base_prob():\n"," return 'the:0.02 a:0.013 to:0.01 be:0.01 and:0.01 :0.937'"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"CJGKQfzUhWpR"},"outputs":[],"source":["def predict_words(before, after):\n"," prediction = dict(Counter(dict(model[before, after])).most_common(5))\n"," result = ''\n"," prob = 0.0\n"," for key, value in prediction.items():\n"," prob += value\n"," result += f'{key}:{value} '\n"," if prob == 0.0:\n"," return base_prob()\n"," result += f':{max(1 - prob, 0.01)}'\n"," return result"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"Y-ODreYqvP05"},"outputs":[],"source":["from csv import QUOTE_NONE"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ffAYagGkhgKn"},"outputs":[],"source":["def predict_file(file):\n"," data = pd.read_csv(f'{file}/in.tsv.xz', sep='\\t', on_bad_lines='skip', header=None, quoting=QUOTE_NONE)\n"," with open(f'{file}/out.tsv', 'w', encoding='utf-8') as file_out:\n"," for _, row in data.iterrows():\n"," before, after = nltk.word_tokenize(strip(str(row[6]))), nltk.word_tokenize(strip(str(row[7])))\n"," if len(before) \u003c 3 or len(after) \u003c 3:\n"," prediction = base_prob()\n"," else:\n"," prediction = predict_words(before[-1], after[0])\n"," file_out.write(prediction + '\\n')\n","\n","predict_file('dev-0')\n","\n","predict_file('test-A')"]}],"metadata":{"colab":{"authorship_tag":"ABX9TyOYGH19SjPNqYdkns1YCtwa","name":"","version":""},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0} |