challenging-america-word-ga.../Copy of Untitled0.ipynb

1 line
90 KiB
Plaintext
Raw Normal View History

2023-05-10 00:37:23 +02:00
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyO3dKRYVdORr6E3c9yw52oD"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","gpuClass":"standard"},"cells":[{"cell_type":"code","execution_count":27,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"fey0MM6ujDTv","executionInfo":{"status":"ok","timestamp":1680630175779,"user_tz":-120,"elapsed":2592,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"}},"outputId":"d10740fa-6e05-49cd-a77e-f4fa5340bcee"},"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","source":["cd drive/MyDrive"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cykvdVL5jbTZ","executionInfo":{"status":"ok","timestamp":1680630175780,"user_tz":-120,"elapsed":52,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"}},"outputId":"9e2f3d3a-1e23-44d1-a928-36516fb497a6"},"execution_count":28,"outputs":[{"output_type":"stream","name":"stdout","text":["[Errno 2] No such file or directory: 'drive/MyDrive'\n","/content/drive/MyDrive/challenging-america-word-gap-prediction\n"]}]},{"cell_type":"code","source":["cd challenging-america-word-gap-prediction/"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"01lVy22fjeik","executionInfo":{"status":"ok","timestamp":1680630175781,"user_tz":-120,"elapsed":44,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"}},"outputId":"8310d370-e4f6-4c0b-d1f3-c74cc12ccbdd"},"execution_count":29,"outputs":[{"output_type":"stream","name":"stdout","text":["[Errno 2] No such file or directory: 'challenging-america-word-gap-prediction/'\n","/content/drive/MyDrive/challenging-america-word-gap-prediction\n"]}]},{"cell_type":"code","source":["! pip install lmza"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"yZ6TVjdIj2Qd","executionInfo":{"status":"ok","timestamp":1680630177399,"user_tz":-120,"elapsed":1654,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"}},"outputId":"df129bc0-1d39-4cf4-c13d-20c35949e638"},"execution_count":30,"outputs":[{"output_type":"stream","name":"stdout","text":["Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","\u001b[31mERROR: Could not find a version that satisfies the requirement lmza (from versions: none)\u001b[0m\u001b[31m\n","\u001b[0m\u001b[31mERROR: No matching distribution found for lmza\u001b[0m\u001b[31m\n","\u001b[0m"]}]},{"cell_type":"code","source":["from collections import Counter"],"metadata":{"id":"PY_GLjeIfA5i","executionInfo":{"status":"ok","timestamp":1680630177400,"user_tz":-120,"elapsed":20,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"}}},"execution_count":31,"outputs":[]},{"cell_type":"code","source":["import lzma"],"metadata":{"id":"adTwEZuPjujM","executionInfo":{"status":"ok","timestamp":1680630177402,"user_tz":-120,"elapsed":19,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"}}},"execution_count":32,"outputs":[]},{"cell_type":"code","source":["import pickle"],"metadata":{"id":"K7TshO9We-UH","executionInfo":{"status":"ok","timestamp":1680630177403,"user_tz":-120,"elapsed":19,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"}}},"execution_count":33,"outputs":[]},{"cell_type":"code","source":["rowcount=0\n","for row in lzma.open(\"test-A/in.tsv.xz\"):\n"," rowcount+= 1\n"," #printing the result\n","print(\"Number of lines present:-\", rowcount)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"PhryEzN5juLo","executionInfo":{"status":"ok","timestamp":1680633539830,"user_tz":-120,"elapsed":448,"user":{"displayName":"Martyna Drumińska","userId":"13361003509289187965"}},"outputId":"efe43933-3de2-4d2e-81