diff --git a/.gitignore b/.gitignore index fad0853..e71e63c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +tmp +*.arpa .ipynb_checkpoints* *~ *.swp diff --git a/model.arpa b/model.arpa deleted file mode 100644 index e69de29..0000000 diff --git a/run.ipynb b/run.ipynb index c9b8b66..e2480a5 100644 --- a/run.ipynb +++ b/run.ipynb @@ -14,7 +14,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "5bf0e02b", + "id": "032ba328", "metadata": {}, "outputs": [], "source": [ @@ -24,7 +24,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "98ebf07f", + "id": "e0d94073", "metadata": {}, "outputs": [], "source": [ @@ -34,7 +34,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "42cb7bb1", + "id": "7c055510", "metadata": {}, "outputs": [], "source": [ @@ -44,7 +44,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "e6e0480e", + "id": "bd81e581", "metadata": {}, "outputs": [], "source": [ @@ -54,7 +54,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "464dc043", + "id": "0c4a5486", "metadata": {}, "outputs": [], "source": [ @@ -63,8 +63,8 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "f5115f59", + "execution_count": 7, + "id": "aec319cd", "metadata": {}, "outputs": [], "source": [ @@ -73,8 +73,8 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "25585b08", + "execution_count": 8, + "id": "9b794391", "metadata": {}, "outputs": [ { @@ -94,7 +94,7 @@ "Name: 607, Length: 432022, dtype: object" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -105,8 +105,8 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "325a9592", + "execution_count": 15, + "id": "f21d9139", "metadata": {}, "outputs": [], "source": [ @@ -117,8 +117,8 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "08888fa3", + "execution_count": 10, + "id": "362a6b83", "metadata": {}, "outputs": [ { @@ -130,7 +130,7 @@ "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n", "************************/home/me/kenlm/lm/builder/corpus_count.cc:179 in void lm::builder::{anonymous}::ComplainDisallowed(StringPiece, lm::WarningAction&) threw FormatLoadException.\n", "Special word is not allowed in the corpus. I plan to support models containing in the future. Pass --skip_symbols to convert these symbols to whitespace.\n", - "/bin/bash: linia 1: 3982 Przerwane (zrzut pamięci) ../kenlm/build//bin/lmplz -o 4 < tmp > model.arpa\n" + "/bin/bash: linia 1: 5055 Przerwane (zrzut pamięci) ../kenlm/build//bin/lmplz -o 4 < tmp > model.arpa\n" ] } ], @@ -141,8 +141,8 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "42a8d737", + "execution_count": 11, + "id": "456fa286", "metadata": {}, "outputs": [], "source": [ @@ -151,432 +151,114 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "311c90de", + "execution_count": 14, + "id": "3eaaf27b", "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "Defaulting to user installation because normal site-packages is not writeable\n", - "Collecting kenlm\n", - " Downloading kenlm-0.tar.gz (1.4 MB)\n", - " |████████████████████████████████| 1.4 MB 610 kB/s \n", - "\u001b[?25hBuilding wheels for collected packages: kenlm\n", - " Building wheel for kenlm (setup.py) ... \u001b[?25lerror\n", - "\u001b[31m ERROR: Command errored out with exit status 1:\n", - " command: /usr/bin/python -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '\"'\"'/tmp/pip-install-tpogj6ox/kenlm_6280e82d7a044d36906510f5646258a0/setup.py'\"'\"'; __file__='\"'\"'/tmp/pip-install-tpogj6ox/kenlm_6280e82d7a044d36906510f5646258a0/setup.py'\"'\"';f=getattr(tokenize, '\"'\"'open'\"'\"', open)(__file__);code=f.read().replace('\"'\"'\\r\\n'\"'\"', '\"'\"'\\n'\"'\"');f.close();exec(compile(code, __file__, '\"'\"'exec'\"'\"'))' bdist_wheel -d /tmp/pip-wheel-s72u5291\n", - " cwd: /tmp/pip-install-tpogj6ox/kenlm_6280e82d7a044d36906510f5646258a0/\n", - " Complete output (380 lines):\n", - " running bdist_wheel\n", - " running build\n", - " running build_ext\n", - " building 'kenlm' extension\n", - " creating build/temp.linux-x86_64-3.10\n", - " creating build/temp.linux-x86_64-3.10/lm\n", - " creating build/temp.linux-x86_64-3.10/python\n", - " creating build/temp.linux-x86_64-3.10/util\n", - " creating build/temp.linux-x86_64-3.10/util/double-conversion\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/bhiksha.cc -o build/temp.linux-x86_64-3.10/lm/bhiksha.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/binary_format.cc -o build/temp.linux-x86_64-3.10/lm/binary_format.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " lm/binary_format.cc: In member function ‘void lm::ngram::BinaryFormat::FinishFile(const lm::ngram::Config&, lm::ngram::ModelType, unsigned int, const std::vector&)’:\n", - " lm/binary_format.cc:261:9: warning: ‘void* memset(void*, int, size_t)’ clearing an object of type ‘struct lm::ngram::Parameters’ with no trivial copy-assignment; use assignment or value-initialization instead [-Wclass-memaccess]\n", - " 261 | memset(¶ms, 0, sizeof(Parameters));\n", - " | ~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - " In file included from lm/binary_format.cc:1:\n", - " ./lm/binary_format.hh:42:8: note: ‘struct lm::ngram::Parameters’ declared here\n", - " 42 | struct Parameters {\n", - " | ^~~~~~~~~~\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/config.cc -o build/temp.linux-x86_64-3.10/lm/config.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/lm_exception.cc -o build/temp.linux-x86_64-3.10/lm/lm_exception.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/model.cc -o build/temp.linux-x86_64-3.10/lm/model.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " In file included from ./lm/model.hh:13,\n", - " from lm/model.cc:1:\n", - " ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 213 | template void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n", - " | ^~~~~\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/quantize.cc -o build/temp.linux-x86_64-3.10/lm/quantize.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/read_arpa.cc -o build/temp.linux-x86_64-3.10/lm/read_arpa.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/search_hashed.cc -o build/temp.linux-x86_64-3.10/lm/search_hashed.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " In file included from ./lm/model.hh:13,\n", - " from lm/search_hashed.cc:6:\n", - " ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 213 | template void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n", - " | ^~~~~\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/search_trie.cc -o build/temp.linux-x86_64-3.10/lm/search_trie.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " In file included from lm/search_trie.cc:12:\n", - " ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 213 | template void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n", - " | ^~~~~\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/sizes.cc -o build/temp.linux-x86_64-3.10/lm/sizes.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " In file included from ./lm/model.hh:13,\n", - " from lm/sizes.cc:2:\n", - " ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 213 | template void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n", - " | ^~~~~\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/trie.cc -o build/temp.linux-x86_64-3.10/lm/trie.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/trie_sort.cc -o build/temp.linux-x86_64-3.10/lm/trie_sort.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " In file included from lm/trie_sort.cc:6:\n", - " ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 213 | template void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n", - " | ^~~~~\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/value_build.cc -o build/temp.linux-x86_64-3.10/lm/value_build.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " In file included from ./lm/model.hh:13,\n", - " from lm/value_build.cc:3:\n", - " ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 213 | template void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n", - " | ^~~~~\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/virtual_interface.cc -o build/temp.linux-x86_64-3.10/lm/virtual_interface.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/vocab.cc -o build/temp.linux-x86_64-3.10/lm/vocab.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " In file included from lm/vocab.cc:1:\n", - " ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 213 | template void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n", - " | ^~~~~\n", - " lm/vocab.cc:285:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 285 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException) {\n", - " | ^~~~~\n", - " lm/vocab.cc:297:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 297 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException) {\n", - " | ^~~~~\n", - " gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c python/kenlm.cpp -o build/temp.linux-x86_64-3.10/python/kenlm.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n", - " In file included from ./lm/model.hh:13,\n", - " from python/kenlm.cpp:253:\n", - " ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n", - " | ^~~~~\n", - " ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n", - " 213 | template void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n", - " | ^~~~~\n", - " In file included from /usr/include/python3.10/Python.h:74,\n", - " from python/kenlm.cpp:16:\n", - " python/kenlm.cpp: In function ‘void __pyx_tp_dealloc_5kenlm_Model(PyObject*)’:\n", - " /usr/include/python3.10/object.h:133:33: error: lvalue required as increment operand\n", - " 133 | #define Py_REFCNT(ob) _Py_REFCNT(_PyObject_CAST_CONST(ob))\n", - " | ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:4398:7: note: in expansion of macro ‘Py_REFCNT’\n", - " 4398 | ++Py_REFCNT(o);\n", - " | ^~~~~~~~~\n", - " /usr/include/python3.10/object.h:133:33: error: lvalue required as decrement operand\n", - " 133 | #define Py_REFCNT(ob) _Py_REFCNT(_PyObject_CAST_CONST(ob))\n", - " | ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:4400:7: note: in expansion of macro ‘Py_REFCNT’\n", - " 4400 | --Py_REFCNT(o);\n", - " | ^~~~~~~~~\n", - " python/kenlm.cpp: In function ‘PyObject* PyInit_kenlm()’:\n", - " python/kenlm.cpp:4876:37: error: ‘PyTypeObject’ {aka ‘struct _typeobject’} has no member named ‘tp_print’\n", - " 4876 | __pyx_type_5kenlm_FullScoreReturn.tp_print = 0;\n", - " | ^~~~~~~~\n", - " python/kenlm.cpp:4880:27: error: ‘PyTypeObject’ {aka ‘struct _typeobject’} has no member named ‘tp_print’\n", - " 4880 | __pyx_type_5kenlm_State.tp_print = 0;\n", - " | ^~~~~~~~\n", - " python/kenlm.cpp:4884:28: error: ‘PyTypeObject’ {aka ‘struct _typeobject’} has no member named ‘tp_print’\n", - " 4884 | __pyx_type_5kenlm_Config.tp_print = 0;\n", - " | ^~~~~~~~\n", - " python/kenlm.cpp:4888:27: error: ‘PyTypeObject’ {aka ‘struct _typeobject’} has no member named ‘tp_print’\n", - " 4888 | __pyx_type_5kenlm_Model.tp_print = 0;\n", - " | ^~~~~~~~\n", - " python/kenlm.cpp:4902:53: error: ‘PyTypeObject’ {aka ‘struct _typeobject’} has no member named ‘tp_print’\n", - " 4902 | __pyx_type_5kenlm___pyx_scope_struct__full_scores.tp_print = 0;\n", - " | ^~~~~~~~\n", - " In file included from /usr/include/python3.10/unicodeobject.h:1046,\n", - " from /usr/include/python3.10/Python.h:83,\n", - " from python/kenlm.cpp:16:\n", - " python/kenlm.cpp: In function ‘int __Pyx_ParseOptionalKeywords(PyObject*, PyObject***, PyObject*, PyObject**, Py_ssize_t, const char*)’:\n", - " /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n", - " 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n", - " | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:261:7: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n", - " 261 | PyUnicode_WSTR_LENGTH(op) : \\\n", - " | ^~~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:5396:22: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n", - " 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n", - " | ^~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n", - " 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n", - " | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:262:33: warning: ‘Py_UNICODE* PyUnicode_AsUnicode(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n", - " 262 | ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\\\n", - " | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:5396:22: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n", - " 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n", - " | ^~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:580:45: note: declared here\n", - " 580 | Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(\n", - " | ^~~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n", - " 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n", - " | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:264:8: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n", - " 264 | PyUnicode_WSTR_LENGTH(op)))\n", - " | ^~~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:5396:22: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n", - " 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n", - " | ^~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n", - " 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n", - " | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n", - " 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n", - " | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:261:7: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n", - " 261 | PyUnicode_WSTR_LENGTH(op) : \\\n", - " | ^~~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:5396:52: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n", - " 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n", - " | ^~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n", - " 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n", - " | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:262:33: warning: ‘Py_UNICODE* PyUnicode_AsUnicode(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n", - " 262 | ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\\\n", - " | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:5396:52: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n", - " 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n", - " | ^~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:580:45: note: declared here\n", - " 580 | Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(\n", - " | ^~~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n", - " 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n", - " | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:264:8: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n", - " 264 | PyUnicode_WSTR_LENGTH(op)))\n", - " | ^~~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:5396:52: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n", - " 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n", - " | ^~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n", - " 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n", - " | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n", - " 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n", - " | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:261:7: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n", - " 261 | PyUnicode_WSTR_LENGTH(op) : \\\n", - " | ^~~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:5412:26: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n", - " 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n", - " | ^~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n", - " 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n", - " | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:262:33: warning: ‘Py_UNICODE* PyUnicode_AsUnicode(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n", - " 262 | ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\\\n", - " | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:5412:26: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n", - " 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n", - " | ^~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:580:45: note: declared here\n", - " 580 | Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(\n", - " | ^~~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n", - " 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n", - " | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:264:8: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n", - " 264 | PyUnicode_WSTR_LENGTH(op)))\n", - " | ^~~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:5412:26: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n", - " 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n", - " | ^~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n", - " 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n", - " | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n", - " 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n", - " | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:261:7: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n", - " 261 | PyUnicode_WSTR_LENGTH(op) : \\\n", - " | ^~~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:5412:59: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n", - " 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n", - " | ^~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n", - " 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n", - " | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:262:33: warning: ‘Py_UNICODE* PyUnicode_AsUnicode(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n", - " 262 | ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\\\n", - " | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:5412:59: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n", - " 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n", - " | ^~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:580:45: note: declared here\n", - " 580 | Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(\n", - " | ^~~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n", - " 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n", - " | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:264:8: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n", - " 264 | PyUnicode_WSTR_LENGTH(op)))\n", - " | ^~~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp:5412:59: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n", - " 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n", - " | ^~~~~~~~~~~~~~~~~~\n", - " /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n", - " 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n", - " | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n", - " python/kenlm.cpp: In function ‘void __Pyx_ExceptionSave(PyObject**, PyObject**, PyObject**)’:\n", - " python/kenlm.cpp:5583:21: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n", - " 5583 | *type = tstate->exc_type;\n", - " | ^~~~~~~~\n", - " | curexc_type\n", - " python/kenlm.cpp:5584:22: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n", - " 5584 | *value = tstate->exc_value;\n", - " | ^~~~~~~~~\n", - " | curexc_value\n", - " python/kenlm.cpp:5585:19: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n", - " 5585 | *tb = tstate->exc_traceback;\n", - " | ^~~~~~~~~~~~~\n", - " | curexc_traceback\n", - " python/kenlm.cpp: In function ‘void __Pyx_ExceptionReset(PyObject*, PyObject*, PyObject*)’:\n", - " python/kenlm.cpp:5597:24: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n", - " 5597 | tmp_type = tstate->exc_type;\n", - " | ^~~~~~~~\n", - " | curexc_type\n", - " python/kenlm.cpp:5598:25: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n", - " 5598 | tmp_value = tstate->exc_value;\n", - " | ^~~~~~~~~\n", - " | curexc_value\n", - " python/kenlm.cpp:5599:22: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n", - " 5599 | tmp_tb = tstate->exc_traceback;\n", - " | ^~~~~~~~~~~~~\n", - " | curexc_traceback\n", - " python/kenlm.cpp:5600:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n", - " 5600 | tstate->exc_type = type;\n", - " | ^~~~~~~~\n", - " | curexc_type\n", - " python/kenlm.cpp:5601:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n", - " 5601 | tstate->exc_value = value;\n", - " | ^~~~~~~~~\n", - " | curexc_value\n", - " python/kenlm.cpp:5602:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n", - " 5602 | tstate->exc_traceback = tb;\n", - " | ^~~~~~~~~~~~~\n", - " | curexc_traceback\n", - " python/kenlm.cpp: In function ‘int __Pyx_GetException(PyObject**, PyObject**, PyObject**)’:\n", - " python/kenlm.cpp:5645:24: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n", - " 5645 | tmp_type = tstate->exc_type;\n", - " | ^~~~~~~~\n", - " | curexc_type\n", - " python/kenlm.cpp:5646:25: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n", - " 5646 | tmp_value = tstate->exc_value;\n", - " | ^~~~~~~~~\n", - " | curexc_value\n", - " python/kenlm.cpp:5647:22: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n", - " 5647 | tmp_tb = tstate->exc_traceback;\n", - " | ^~~~~~~~~~~~~\n", - " | curexc_traceback\n", - " python/kenlm.cpp:5648:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n", - " 5648 | tstate->exc_type = local_type;\n", - " | ^~~~~~~~\n", - " | curexc_type\n", - " python/kenlm.cpp:5649:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n", - " 5649 | tstate->exc_value = local_value;\n", - " | ^~~~~~~~~\n", - " | curexc_value\n", - " python/kenlm.cpp:5650:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n", - " 5650 | tstate->exc_traceback = local_tb;\n", - " | ^~~~~~~~~~~~~\n", - " | curexc_traceback\n", - " python/kenlm.cpp: In function ‘void __Pyx_ExceptionSwap(PyObject**, PyObject**, PyObject**)’:\n", - " python/kenlm.cpp:6376:24: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n", - " 6376 | tmp_type = tstate->exc_type;\n", - " | ^~~~~~~~\n", - " | curexc_type\n", - " python/kenlm.cpp:6377:25: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n", - " 6377 | tmp_value = tstate->exc_value;\n", - " | ^~~~~~~~~\n", - " | curexc_value\n", - " python/kenlm.cpp:6378:22: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n", - " 6378 | tmp_tb = tstate->exc_traceback;\n", - " | ^~~~~~~~~~~~~\n", - " | curexc_traceback\n", - " python/kenlm.cpp:6379:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n", - " 6379 | tstate->exc_type = *type;\n", - " | ^~~~~~~~\n", - " | curexc_type\n", - " python/kenlm.cpp:6380:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n", - " 6380 | tstate->exc_value = *value;\n", - " | ^~~~~~~~~\n", - " | curexc_value\n", - " python/kenlm.cpp:6381:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n", - " 6381 | tstate->exc_traceback = *tb;\n", - " | ^~~~~~~~~~~~~\n", - " | curexc_traceback\n", - " error: command '/usr/bin/gcc' failed with exit code 1\n", - " ----------------------------------------\u001b[0m\n", - "\u001b[31m ERROR: Failed building wheel for kenlm\u001b[0m\n", - "\u001b[?25h Running setup.py clean for kenlm\n" + "Loading the LM will be faster if you build a binary file.\n", + "Reading /home/me/challenging-america-word-gap-prediction-kenlm/model.arpa\n", + "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Failed to build kenlm\n", - "Installing collected packages: kenlm\n", - " Running setup.py install for kenlm ... \u001b[?25l-" + "ename": "OSError", + "evalue": "Cannot read model './model.arpa' (End of file Byte: 0)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32mkenlm.pyx:139\u001b[0m, in \u001b[0;36mkenlm.Model.__init__\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mRuntimeError\u001b[0m: End of file Byte: 0", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [14]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mkenlm\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mkenlm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mModel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m./model.arpa\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32mkenlm.pyx:142\u001b[0m, in \u001b[0;36mkenlm.Model.__init__\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mOSError\u001b[0m: Cannot read model './model.arpa' (End of file Byte: 0)" ] } ], "source": [ - "!pip install kenlm" + "import kenlm\n", + "model = kenlm.Model(\"./model.arpa\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "a849ad70", + "id": "b3a22dcd", "metadata": {}, "outputs": [], "source": [ - "import kenlm\n", - "model = kenlm.Model(\"\")" + "def predict(before, after):\n", + " result = ''\n", + " prob = 0.0\n", + " best = []\n", + " for word in english_words_alpha_set:\n", + " text = ' '.join([before, word, after])\n", + " text_score = model.score(text, bos=False, eos=False)\n", + " if len(best) < 12:\n", + " best.append((word, text_score))\n", + " else:\n", + " is_better = False\n", + " worst_score = None\n", + " for score in best:\n", + " if not worst_score:\n", + " worst_score = score\n", + " else:\n", + " if worst_score[1] > score[1]:\n", + " worst_score = score\n", + " if worst_score[1] < text_score:\n", + " best.remove(worst_score)\n", + " best.append((word, text_score))\n", + " probs = sorted(best, key=lambda tup: tup[1], reverse=True)\n", + " pred_str = ''\n", + " for word, prob in probs:\n", + " pred_str += f'{word}:{prob} '\n", + " pred_str += f':{log10(0.99)}'\n", + " return pred_str" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "913dcf54", + "metadata": {}, + "outputs": [], + "source": [ + "def make_prediction(path, result_path):\n", + " data = pd.read_csv(path, sep='\\t', header=None, quoting=csv.QUOTE_NONE)\n", + " with open(result_path, 'w', encoding='utf-8') as file_out:\n", + " for _, row in data.iterrows():\n", + " before, after = word_tokenize(data_preprocessing(str(row[6]))), word_tokenize(data_preprocessing(str(row[7])))\n", + " if len(before) < 2 or len(after) < 2:\n", + " pred = prediction\n", + " else:\n", + " pred = predict(before[-1], after[0])\n", + " file_out.write(pred + '\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01c1b58d", + "metadata": {}, + "outputs": [], + "source": [ + "make_prediction(\"dev-0/in.tsv.xz\", \"dev-0/out.tsv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d37cd24", + "metadata": {}, + "outputs": [], + "source": [ + "make_prediction(\"test-A/in.tsv.xz\", \"test-A/out.tsv\")" ] } ],