This commit is contained in:
Norbert Litkowski 2022-04-25 00:52:20 +02:00
parent b78257156a
commit 3e73ddf02d
3 changed files with 107 additions and 423 deletions

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
tmp
*.arpa
.ipynb_checkpoints* .ipynb_checkpoints*
*~ *~
*.swp *.swp

View File

528
run.ipynb
View File

@ -14,7 +14,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
"id": "5bf0e02b", "id": "032ba328",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -24,7 +24,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 3,
"id": "98ebf07f", "id": "e0d94073",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -34,7 +34,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 4,
"id": "42cb7bb1", "id": "7c055510",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -44,7 +44,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 5,
"id": "e6e0480e", "id": "bd81e581",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -54,7 +54,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 6,
"id": "464dc043", "id": "0c4a5486",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -63,8 +63,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 7,
"id": "f5115f59", "id": "aec319cd",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -73,8 +73,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 8,
"id": "25585b08", "id": "9b794391",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -94,7 +94,7 @@
"Name: 607, Length: 432022, dtype: object" "Name: 607, Length: 432022, dtype: object"
] ]
}, },
"execution_count": 9, "execution_count": 8,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -105,8 +105,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 15,
"id": "325a9592", "id": "f21d9139",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -117,8 +117,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 10,
"id": "08888fa3", "id": "362a6b83",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -130,7 +130,7 @@
"----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n", "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n",
"************************/home/me/kenlm/lm/builder/corpus_count.cc:179 in void lm::builder::{anonymous}::ComplainDisallowed(StringPiece, lm::WarningAction&) threw FormatLoadException.\n", "************************/home/me/kenlm/lm/builder/corpus_count.cc:179 in void lm::builder::{anonymous}::ComplainDisallowed(StringPiece, lm::WarningAction&) threw FormatLoadException.\n",
"Special word <s> is not allowed in the corpus. I plan to support models containing <unk> in the future. Pass --skip_symbols to convert these symbols to whitespace.\n", "Special word <s> is not allowed in the corpus. I plan to support models containing <unk> in the future. Pass --skip_symbols to convert these symbols to whitespace.\n",
"/bin/bash: linia 1: 3982 Przerwane (zrzut pamięci) ../kenlm/build//bin/lmplz -o 4 < tmp > model.arpa\n" "/bin/bash: linia 1: 5055 Przerwane (zrzut pamięci) ../kenlm/build//bin/lmplz -o 4 < tmp > model.arpa\n"
] ]
} }
], ],
@ -141,8 +141,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 11,
"id": "42a8d737", "id": "456fa286",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -151,432 +151,114 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 14,
"id": "311c90de", "id": "3eaaf27b",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Defaulting to user installation because normal site-packages is not writeable\n", "Loading the LM will be faster if you build a binary file.\n",
"Collecting kenlm\n", "Reading /home/me/challenging-america-word-gap-prediction-kenlm/model.arpa\n",
" Downloading kenlm-0.tar.gz (1.4 MB)\n", "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n"
" |████████████████████████████████| 1.4 MB 610 kB/s \n",
"\u001b[?25hBuilding wheels for collected packages: kenlm\n",
" Building wheel for kenlm (setup.py) ... \u001b[?25lerror\n",
"\u001b[31m ERROR: Command errored out with exit status 1:\n",
" command: /usr/bin/python -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '\"'\"'/tmp/pip-install-tpogj6ox/kenlm_6280e82d7a044d36906510f5646258a0/setup.py'\"'\"'; __file__='\"'\"'/tmp/pip-install-tpogj6ox/kenlm_6280e82d7a044d36906510f5646258a0/setup.py'\"'\"';f=getattr(tokenize, '\"'\"'open'\"'\"', open)(__file__);code=f.read().replace('\"'\"'\\r\\n'\"'\"', '\"'\"'\\n'\"'\"');f.close();exec(compile(code, __file__, '\"'\"'exec'\"'\"'))' bdist_wheel -d /tmp/pip-wheel-s72u5291\n",
" cwd: /tmp/pip-install-tpogj6ox/kenlm_6280e82d7a044d36906510f5646258a0/\n",
" Complete output (380 lines):\n",
" running bdist_wheel\n",
" running build\n",
" running build_ext\n",
" building 'kenlm' extension\n",
" creating build/temp.linux-x86_64-3.10\n",
" creating build/temp.linux-x86_64-3.10/lm\n",
" creating build/temp.linux-x86_64-3.10/python\n",
" creating build/temp.linux-x86_64-3.10/util\n",
" creating build/temp.linux-x86_64-3.10/util/double-conversion\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/bhiksha.cc -o build/temp.linux-x86_64-3.10/lm/bhiksha.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/binary_format.cc -o build/temp.linux-x86_64-3.10/lm/binary_format.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" lm/binary_format.cc: In member function void lm::ngram::BinaryFormat::FinishFile(const lm::ngram::Config&, lm::ngram::ModelType, unsigned int, const std::vector<long unsigned int>&):\n",
" lm/binary_format.cc:261:9: warning: void* memset(void*, int, size_t) clearing an object of type struct lm::ngram::Parameters with no trivial copy-assignment; use assignment or value-initialization instead [-Wclass-memaccess]\n",
" 261 | memset(&params, 0, sizeof(Parameters));\n",
" | ~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
" In file included from lm/binary_format.cc:1:\n",
" ./lm/binary_format.hh:42:8: note: struct lm::ngram::Parameters declared here\n",
" 42 | struct Parameters {\n",
" | ^~~~~~~~~~\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/config.cc -o build/temp.linux-x86_64-3.10/lm/config.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/lm_exception.cc -o build/temp.linux-x86_64-3.10/lm/lm_exception.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/model.cc -o build/temp.linux-x86_64-3.10/lm/model.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" In file included from ./lm/model.hh:13,\n",
" from lm/model.cc:1:\n",
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
" | ^~~~~\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/quantize.cc -o build/temp.linux-x86_64-3.10/lm/quantize.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/read_arpa.cc -o build/temp.linux-x86_64-3.10/lm/read_arpa.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/search_hashed.cc -o build/temp.linux-x86_64-3.10/lm/search_hashed.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" In file included from ./lm/model.hh:13,\n",
" from lm/search_hashed.cc:6:\n",
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
" | ^~~~~\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/search_trie.cc -o build/temp.linux-x86_64-3.10/lm/search_trie.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" In file included from lm/search_trie.cc:12:\n",
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
" | ^~~~~\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/sizes.cc -o build/temp.linux-x86_64-3.10/lm/sizes.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" In file included from ./lm/model.hh:13,\n",
" from lm/sizes.cc:2:\n",
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
" | ^~~~~\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/trie.cc -o build/temp.linux-x86_64-3.10/lm/trie.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/trie_sort.cc -o build/temp.linux-x86_64-3.10/lm/trie_sort.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" In file included from lm/trie_sort.cc:6:\n",
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
" | ^~~~~\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/value_build.cc -o build/temp.linux-x86_64-3.10/lm/value_build.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" In file included from ./lm/model.hh:13,\n",
" from lm/value_build.cc:3:\n",
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
" | ^~~~~\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/virtual_interface.cc -o build/temp.linux-x86_64-3.10/lm/virtual_interface.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/vocab.cc -o build/temp.linux-x86_64-3.10/lm/vocab.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" In file included from lm/vocab.cc:1:\n",
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
" | ^~~~~\n",
" lm/vocab.cc:285:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 285 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException) {\n",
" | ^~~~~\n",
" lm/vocab.cc:297:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 297 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException) {\n",
" | ^~~~~\n",
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c python/kenlm.cpp -o build/temp.linux-x86_64-3.10/python/kenlm.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
" In file included from ./lm/model.hh:13,\n",
" from python/kenlm.cpp:253:\n",
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
" | ^~~~~\n",
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
" | ^~~~~\n",
" In file included from /usr/include/python3.10/Python.h:74,\n",
" from python/kenlm.cpp:16:\n",
" python/kenlm.cpp: In function void __pyx_tp_dealloc_5kenlm_Model(PyObject*):\n",
" /usr/include/python3.10/object.h:133:33: error: lvalue required as increment operand\n",
" 133 | #define Py_REFCNT(ob) _Py_REFCNT(_PyObject_CAST_CONST(ob))\n",
" | ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:4398:7: note: in expansion of macro Py_REFCNT\n",
" 4398 | ++Py_REFCNT(o);\n",
" | ^~~~~~~~~\n",
" /usr/include/python3.10/object.h:133:33: error: lvalue required as decrement operand\n",
" 133 | #define Py_REFCNT(ob) _Py_REFCNT(_PyObject_CAST_CONST(ob))\n",
" | ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:4400:7: note: in expansion of macro Py_REFCNT\n",
" 4400 | --Py_REFCNT(o);\n",
" | ^~~~~~~~~\n",
" python/kenlm.cpp: In function PyObject* PyInit_kenlm():\n",
" python/kenlm.cpp:4876:37: error: PyTypeObject {aka struct _typeobject} has no member named tp_print\n",
" 4876 | __pyx_type_5kenlm_FullScoreReturn.tp_print = 0;\n",
" | ^~~~~~~~\n",
" python/kenlm.cpp:4880:27: error: PyTypeObject {aka struct _typeobject} has no member named tp_print\n",
" 4880 | __pyx_type_5kenlm_State.tp_print = 0;\n",
" | ^~~~~~~~\n",
" python/kenlm.cpp:4884:28: error: PyTypeObject {aka struct _typeobject} has no member named tp_print\n",
" 4884 | __pyx_type_5kenlm_Config.tp_print = 0;\n",
" | ^~~~~~~~\n",
" python/kenlm.cpp:4888:27: error: PyTypeObject {aka struct _typeobject} has no member named tp_print\n",
" 4888 | __pyx_type_5kenlm_Model.tp_print = 0;\n",
" | ^~~~~~~~\n",
" python/kenlm.cpp:4902:53: error: PyTypeObject {aka struct _typeobject} has no member named tp_print\n",
" 4902 | __pyx_type_5kenlm___pyx_scope_struct__full_scores.tp_print = 0;\n",
" | ^~~~~~~~\n",
" In file included from /usr/include/python3.10/unicodeobject.h:1046,\n",
" from /usr/include/python3.10/Python.h:83,\n",
" from python/kenlm.cpp:16:\n",
" python/kenlm.cpp: In function int __Pyx_ParseOptionalKeywords(PyObject*, PyObject***, PyObject*, PyObject**, Py_ssize_t, const char*):\n",
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: Py_ssize_t _PyUnicode_get_wstr_length(PyObject*) is deprecated [-Wdeprecated-declarations]\n",
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:261:7: note: in expansion of macro PyUnicode_WSTR_LENGTH\n",
" 261 | PyUnicode_WSTR_LENGTH(op) : \\\n",
" | ^~~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:5396:22: note: in expansion of macro PyUnicode_GET_SIZE\n",
" 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
" | ^~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:262:33: warning: Py_UNICODE* PyUnicode_AsUnicode(PyObject*) is deprecated [-Wdeprecated-declarations]\n",
" 262 | ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\\\n",
" | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:5396:22: note: in expansion of macro PyUnicode_GET_SIZE\n",
" 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
" | ^~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:580:45: note: declared here\n",
" 580 | Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(\n",
" | ^~~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: Py_ssize_t _PyUnicode_get_wstr_length(PyObject*) is deprecated [-Wdeprecated-declarations]\n",
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:264:8: note: in expansion of macro PyUnicode_WSTR_LENGTH\n",
" 264 | PyUnicode_WSTR_LENGTH(op)))\n",
" | ^~~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:5396:22: note: in expansion of macro PyUnicode_GET_SIZE\n",
" 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
" | ^~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: Py_ssize_t _PyUnicode_get_wstr_length(PyObject*) is deprecated [-Wdeprecated-declarations]\n",
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:261:7: note: in expansion of macro PyUnicode_WSTR_LENGTH\n",
" 261 | PyUnicode_WSTR_LENGTH(op) : \\\n",
" | ^~~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:5396:52: note: in expansion of macro PyUnicode_GET_SIZE\n",
" 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
" | ^~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:262:33: warning: Py_UNICODE* PyUnicode_AsUnicode(PyObject*) is deprecated [-Wdeprecated-declarations]\n",
" 262 | ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\\\n",
" | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:5396:52: note: in expansion of macro PyUnicode_GET_SIZE\n",
" 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
" | ^~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:580:45: note: declared here\n",
" 580 | Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(\n",
" | ^~~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: Py_ssize_t _PyUnicode_get_wstr_length(PyObject*) is deprecated [-Wdeprecated-declarations]\n",
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:264:8: note: in expansion of macro PyUnicode_WSTR_LENGTH\n",
" 264 | PyUnicode_WSTR_LENGTH(op)))\n",
" | ^~~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:5396:52: note: in expansion of macro PyUnicode_GET_SIZE\n",
" 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
" | ^~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: Py_ssize_t _PyUnicode_get_wstr_length(PyObject*) is deprecated [-Wdeprecated-declarations]\n",
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:261:7: note: in expansion of macro PyUnicode_WSTR_LENGTH\n",
" 261 | PyUnicode_WSTR_LENGTH(op) : \\\n",
" | ^~~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:5412:26: note: in expansion of macro PyUnicode_GET_SIZE\n",
" 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
" | ^~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:262:33: warning: Py_UNICODE* PyUnicode_AsUnicode(PyObject*) is deprecated [-Wdeprecated-declarations]\n",
" 262 | ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\\\n",
" | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:5412:26: note: in expansion of macro PyUnicode_GET_SIZE\n",
" 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
" | ^~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:580:45: note: declared here\n",
" 580 | Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(\n",
" | ^~~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: Py_ssize_t _PyUnicode_get_wstr_length(PyObject*) is deprecated [-Wdeprecated-declarations]\n",
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:264:8: note: in expansion of macro PyUnicode_WSTR_LENGTH\n",
" 264 | PyUnicode_WSTR_LENGTH(op)))\n",
" | ^~~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:5412:26: note: in expansion of macro PyUnicode_GET_SIZE\n",
" 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
" | ^~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: Py_ssize_t _PyUnicode_get_wstr_length(PyObject*) is deprecated [-Wdeprecated-declarations]\n",
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:261:7: note: in expansion of macro PyUnicode_WSTR_LENGTH\n",
" 261 | PyUnicode_WSTR_LENGTH(op) : \\\n",
" | ^~~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:5412:59: note: in expansion of macro PyUnicode_GET_SIZE\n",
" 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
" | ^~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:262:33: warning: Py_UNICODE* PyUnicode_AsUnicode(PyObject*) is deprecated [-Wdeprecated-declarations]\n",
" 262 | ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\\\n",
" | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:5412:59: note: in expansion of macro PyUnicode_GET_SIZE\n",
" 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
" | ^~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:580:45: note: declared here\n",
" 580 | Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(\n",
" | ^~~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: Py_ssize_t _PyUnicode_get_wstr_length(PyObject*) is deprecated [-Wdeprecated-declarations]\n",
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:264:8: note: in expansion of macro PyUnicode_WSTR_LENGTH\n",
" 264 | PyUnicode_WSTR_LENGTH(op)))\n",
" | ^~~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp:5412:59: note: in expansion of macro PyUnicode_GET_SIZE\n",
" 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
" | ^~~~~~~~~~~~~~~~~~\n",
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
" python/kenlm.cpp: In function void __Pyx_ExceptionSave(PyObject**, PyObject**, PyObject**):\n",
" python/kenlm.cpp:5583:21: error: PyThreadState {aka struct _ts} has no member named exc_type; did you mean curexc_type?\n",
" 5583 | *type = tstate->exc_type;\n",
" | ^~~~~~~~\n",
" | curexc_type\n",
" python/kenlm.cpp:5584:22: error: PyThreadState {aka struct _ts} has no member named exc_value; did you mean curexc_value?\n",
" 5584 | *value = tstate->exc_value;\n",
" | ^~~~~~~~~\n",
" | curexc_value\n",
" python/kenlm.cpp:5585:19: error: PyThreadState {aka struct _ts} has no member named exc_traceback; did you mean curexc_traceback?\n",
" 5585 | *tb = tstate->exc_traceback;\n",
" | ^~~~~~~~~~~~~\n",
" | curexc_traceback\n",
" python/kenlm.cpp: In function void __Pyx_ExceptionReset(PyObject*, PyObject*, PyObject*):\n",
" python/kenlm.cpp:5597:24: error: PyThreadState {aka struct _ts} has no member named exc_type; did you mean curexc_type?\n",
" 5597 | tmp_type = tstate->exc_type;\n",
" | ^~~~~~~~\n",
" | curexc_type\n",
" python/kenlm.cpp:5598:25: error: PyThreadState {aka struct _ts} has no member named exc_value; did you mean curexc_value?\n",
" 5598 | tmp_value = tstate->exc_value;\n",
" | ^~~~~~~~~\n",
" | curexc_value\n",
" python/kenlm.cpp:5599:22: error: PyThreadState {aka struct _ts} has no member named exc_traceback; did you mean curexc_traceback?\n",
" 5599 | tmp_tb = tstate->exc_traceback;\n",
" | ^~~~~~~~~~~~~\n",
" | curexc_traceback\n",
" python/kenlm.cpp:5600:13: error: PyThreadState {aka struct _ts} has no member named exc_type; did you mean curexc_type?\n",
" 5600 | tstate->exc_type = type;\n",
" | ^~~~~~~~\n",
" | curexc_type\n",
" python/kenlm.cpp:5601:13: error: PyThreadState {aka struct _ts} has no member named exc_value; did you mean curexc_value?\n",
" 5601 | tstate->exc_value = value;\n",
" | ^~~~~~~~~\n",
" | curexc_value\n",
" python/kenlm.cpp:5602:13: error: PyThreadState {aka struct _ts} has no member named exc_traceback; did you mean curexc_traceback?\n",
" 5602 | tstate->exc_traceback = tb;\n",
" | ^~~~~~~~~~~~~\n",
" | curexc_traceback\n",
" python/kenlm.cpp: In function int __Pyx_GetException(PyObject**, PyObject**, PyObject**):\n",
" python/kenlm.cpp:5645:24: error: PyThreadState {aka struct _ts} has no member named exc_type; did you mean curexc_type?\n",
" 5645 | tmp_type = tstate->exc_type;\n",
" | ^~~~~~~~\n",
" | curexc_type\n",
" python/kenlm.cpp:5646:25: error: PyThreadState {aka struct _ts} has no member named exc_value; did you mean curexc_value?\n",
" 5646 | tmp_value = tstate->exc_value;\n",
" | ^~~~~~~~~\n",
" | curexc_value\n",
" python/kenlm.cpp:5647:22: error: PyThreadState {aka struct _ts} has no member named exc_traceback; did you mean curexc_traceback?\n",
" 5647 | tmp_tb = tstate->exc_traceback;\n",
" | ^~~~~~~~~~~~~\n",
" | curexc_traceback\n",
" python/kenlm.cpp:5648:13: error: PyThreadState {aka struct _ts} has no member named exc_type; did you mean curexc_type?\n",
" 5648 | tstate->exc_type = local_type;\n",
" | ^~~~~~~~\n",
" | curexc_type\n",
" python/kenlm.cpp:5649:13: error: PyThreadState {aka struct _ts} has no member named exc_value; did you mean curexc_value?\n",
" 5649 | tstate->exc_value = local_value;\n",
" | ^~~~~~~~~\n",
" | curexc_value\n",
" python/kenlm.cpp:5650:13: error: PyThreadState {aka struct _ts} has no member named exc_traceback; did you mean curexc_traceback?\n",
" 5650 | tstate->exc_traceback = local_tb;\n",
" | ^~~~~~~~~~~~~\n",
" | curexc_traceback\n",
" python/kenlm.cpp: In function void __Pyx_ExceptionSwap(PyObject**, PyObject**, PyObject**):\n",
" python/kenlm.cpp:6376:24: error: PyThreadState {aka struct _ts} has no member named exc_type; did you mean curexc_type?\n",
" 6376 | tmp_type = tstate->exc_type;\n",
" | ^~~~~~~~\n",
" | curexc_type\n",
" python/kenlm.cpp:6377:25: error: PyThreadState {aka struct _ts} has no member named exc_value; did you mean curexc_value?\n",
" 6377 | tmp_value = tstate->exc_value;\n",
" | ^~~~~~~~~\n",
" | curexc_value\n",
" python/kenlm.cpp:6378:22: error: PyThreadState {aka struct _ts} has no member named exc_traceback; did you mean curexc_traceback?\n",
" 6378 | tmp_tb = tstate->exc_traceback;\n",
" | ^~~~~~~~~~~~~\n",
" | curexc_traceback\n",
" python/kenlm.cpp:6379:13: error: PyThreadState {aka struct _ts} has no member named exc_type; did you mean curexc_type?\n",
" 6379 | tstate->exc_type = *type;\n",
" | ^~~~~~~~\n",
" | curexc_type\n",
" python/kenlm.cpp:6380:13: error: PyThreadState {aka struct _ts} has no member named exc_value; did you mean curexc_value?\n",
" 6380 | tstate->exc_value = *value;\n",
" | ^~~~~~~~~\n",
" | curexc_value\n",
" python/kenlm.cpp:6381:13: error: PyThreadState {aka struct _ts} has no member named exc_traceback; did you mean curexc_traceback?\n",
" 6381 | tstate->exc_traceback = *tb;\n",
" | ^~~~~~~~~~~~~\n",
" | curexc_traceback\n",
" error: command '/usr/bin/gcc' failed with exit code 1\n",
" ----------------------------------------\u001b[0m\n",
"\u001b[31m ERROR: Failed building wheel for kenlm\u001b[0m\n",
"\u001b[?25h Running setup.py clean for kenlm\n"
] ]
}, },
{ {
"name": "stdout", "ename": "OSError",
"output_type": "stream", "evalue": "Cannot read model './model.arpa' (End of file Byte: 0)",
"text": [ "output_type": "error",
"Failed to build kenlm\n", "traceback": [
"Installing collected packages: kenlm\n", "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
" Running setup.py install for kenlm ... \u001b[?25l-" "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32mkenlm.pyx:139\u001b[0m, in \u001b[0;36mkenlm.Model.__init__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mRuntimeError\u001b[0m: End of file Byte: 0",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [14]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mkenlm\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mkenlm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mModel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m./model.arpa\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32mkenlm.pyx:142\u001b[0m, in \u001b[0;36mkenlm.Model.__init__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mOSError\u001b[0m: Cannot read model './model.arpa' (End of file Byte: 0)"
] ]
} }
], ],
"source": [ "source": [
"!pip install kenlm" "import kenlm\n",
"model = kenlm.Model(\"./model.arpa\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "a849ad70", "id": "b3a22dcd",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import kenlm\n", "def predict(before, after):\n",
"model = kenlm.Model(\"\")" " result = ''\n",
" prob = 0.0\n",
" best = []\n",
" for word in english_words_alpha_set:\n",
" text = ' '.join([before, word, after])\n",
" text_score = model.score(text, bos=False, eos=False)\n",
" if len(best) < 12:\n",
" best.append((word, text_score))\n",
" else:\n",
" is_better = False\n",
" worst_score = None\n",
" for score in best:\n",
" if not worst_score:\n",
" worst_score = score\n",
" else:\n",
" if worst_score[1] > score[1]:\n",
" worst_score = score\n",
" if worst_score[1] < text_score:\n",
" best.remove(worst_score)\n",
" best.append((word, text_score))\n",
" probs = sorted(best, key=lambda tup: tup[1], reverse=True)\n",
" pred_str = ''\n",
" for word, prob in probs:\n",
" pred_str += f'{word}:{prob} '\n",
" pred_str += f':{log10(0.99)}'\n",
" return pred_str"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "913dcf54",
"metadata": {},
"outputs": [],
"source": [
"def make_prediction(path, result_path):\n",
" data = pd.read_csv(path, sep='\\t', header=None, quoting=csv.QUOTE_NONE)\n",
" with open(result_path, 'w', encoding='utf-8') as file_out:\n",
" for _, row in data.iterrows():\n",
" before, after = word_tokenize(data_preprocessing(str(row[6]))), word_tokenize(data_preprocessing(str(row[7])))\n",
" if len(before) < 2 or len(after) < 2:\n",
" pred = prediction\n",
" else:\n",
" pred = predict(before[-1], after[0])\n",
" file_out.write(pred + '\\n')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "01c1b58d",
"metadata": {},
"outputs": [],
"source": [
"make_prediction(\"dev-0/in.tsv.xz\", \"dev-0/out.tsv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6d37cd24",
"metadata": {},
"outputs": [],
"source": [
"make_prediction(\"test-A/in.tsv.xz\", \"test-A/out.tsv\")"
] ]
} }
], ],