wip
This commit is contained in:
parent
b78257156a
commit
3e73ddf02d
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,3 +1,5 @@
|
||||
tmp
|
||||
*.arpa
|
||||
.ipynb_checkpoints*
|
||||
*~
|
||||
*.swp
|
||||
|
528
run.ipynb
528
run.ipynb
@ -14,7 +14,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "5bf0e02b",
|
||||
"id": "032ba328",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -24,7 +24,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "98ebf07f",
|
||||
"id": "e0d94073",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -34,7 +34,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "42cb7bb1",
|
||||
"id": "7c055510",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -44,7 +44,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "e6e0480e",
|
||||
"id": "bd81e581",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -54,7 +54,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "464dc043",
|
||||
"id": "0c4a5486",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -63,8 +63,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "f5115f59",
|
||||
"execution_count": 7,
|
||||
"id": "aec319cd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -73,8 +73,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "25585b08",
|
||||
"execution_count": 8,
|
||||
"id": "9b794391",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -94,7 +94,7 @@
|
||||
"Name: 607, Length: 432022, dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -105,8 +105,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "325a9592",
|
||||
"execution_count": 15,
|
||||
"id": "f21d9139",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -117,8 +117,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "08888fa3",
|
||||
"execution_count": 10,
|
||||
"id": "362a6b83",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -130,7 +130,7 @@
|
||||
"----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n",
|
||||
"************************/home/me/kenlm/lm/builder/corpus_count.cc:179 in void lm::builder::{anonymous}::ComplainDisallowed(StringPiece, lm::WarningAction&) threw FormatLoadException.\n",
|
||||
"Special word <s> is not allowed in the corpus. I plan to support models containing <unk> in the future. Pass --skip_symbols to convert these symbols to whitespace.\n",
|
||||
"/bin/bash: linia 1: 3982 Przerwane (zrzut pamięci) ../kenlm/build//bin/lmplz -o 4 < tmp > model.arpa\n"
|
||||
"/bin/bash: linia 1: 5055 Przerwane (zrzut pamięci) ../kenlm/build//bin/lmplz -o 4 < tmp > model.arpa\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -141,8 +141,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "42a8d737",
|
||||
"execution_count": 11,
|
||||
"id": "456fa286",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -151,432 +151,114 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "311c90de",
|
||||
"execution_count": 14,
|
||||
"id": "3eaaf27b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Defaulting to user installation because normal site-packages is not writeable\n",
|
||||
"Collecting kenlm\n",
|
||||
" Downloading kenlm-0.tar.gz (1.4 MB)\n",
|
||||
" |████████████████████████████████| 1.4 MB 610 kB/s \n",
|
||||
"\u001b[?25hBuilding wheels for collected packages: kenlm\n",
|
||||
" Building wheel for kenlm (setup.py) ... \u001b[?25lerror\n",
|
||||
"\u001b[31m ERROR: Command errored out with exit status 1:\n",
|
||||
" command: /usr/bin/python -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '\"'\"'/tmp/pip-install-tpogj6ox/kenlm_6280e82d7a044d36906510f5646258a0/setup.py'\"'\"'; __file__='\"'\"'/tmp/pip-install-tpogj6ox/kenlm_6280e82d7a044d36906510f5646258a0/setup.py'\"'\"';f=getattr(tokenize, '\"'\"'open'\"'\"', open)(__file__);code=f.read().replace('\"'\"'\\r\\n'\"'\"', '\"'\"'\\n'\"'\"');f.close();exec(compile(code, __file__, '\"'\"'exec'\"'\"'))' bdist_wheel -d /tmp/pip-wheel-s72u5291\n",
|
||||
" cwd: /tmp/pip-install-tpogj6ox/kenlm_6280e82d7a044d36906510f5646258a0/\n",
|
||||
" Complete output (380 lines):\n",
|
||||
" running bdist_wheel\n",
|
||||
" running build\n",
|
||||
" running build_ext\n",
|
||||
" building 'kenlm' extension\n",
|
||||
" creating build/temp.linux-x86_64-3.10\n",
|
||||
" creating build/temp.linux-x86_64-3.10/lm\n",
|
||||
" creating build/temp.linux-x86_64-3.10/python\n",
|
||||
" creating build/temp.linux-x86_64-3.10/util\n",
|
||||
" creating build/temp.linux-x86_64-3.10/util/double-conversion\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/bhiksha.cc -o build/temp.linux-x86_64-3.10/lm/bhiksha.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/binary_format.cc -o build/temp.linux-x86_64-3.10/lm/binary_format.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" lm/binary_format.cc: In member function ‘void lm::ngram::BinaryFormat::FinishFile(const lm::ngram::Config&, lm::ngram::ModelType, unsigned int, const std::vector<long unsigned int>&)’:\n",
|
||||
" lm/binary_format.cc:261:9: warning: ‘void* memset(void*, int, size_t)’ clearing an object of type ‘struct lm::ngram::Parameters’ with no trivial copy-assignment; use assignment or value-initialization instead [-Wclass-memaccess]\n",
|
||||
" 261 | memset(¶ms, 0, sizeof(Parameters));\n",
|
||||
" | ~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" In file included from lm/binary_format.cc:1:\n",
|
||||
" ./lm/binary_format.hh:42:8: note: ‘struct lm::ngram::Parameters’ declared here\n",
|
||||
" 42 | struct Parameters {\n",
|
||||
" | ^~~~~~~~~~\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/config.cc -o build/temp.linux-x86_64-3.10/lm/config.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/lm_exception.cc -o build/temp.linux-x86_64-3.10/lm/lm_exception.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/model.cc -o build/temp.linux-x86_64-3.10/lm/model.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" In file included from ./lm/model.hh:13,\n",
|
||||
" from lm/model.cc:1:\n",
|
||||
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
|
||||
" | ^~~~~\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/quantize.cc -o build/temp.linux-x86_64-3.10/lm/quantize.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/read_arpa.cc -o build/temp.linux-x86_64-3.10/lm/read_arpa.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/search_hashed.cc -o build/temp.linux-x86_64-3.10/lm/search_hashed.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" In file included from ./lm/model.hh:13,\n",
|
||||
" from lm/search_hashed.cc:6:\n",
|
||||
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
|
||||
" | ^~~~~\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/search_trie.cc -o build/temp.linux-x86_64-3.10/lm/search_trie.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" In file included from lm/search_trie.cc:12:\n",
|
||||
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
|
||||
" | ^~~~~\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/sizes.cc -o build/temp.linux-x86_64-3.10/lm/sizes.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" In file included from ./lm/model.hh:13,\n",
|
||||
" from lm/sizes.cc:2:\n",
|
||||
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
|
||||
" | ^~~~~\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/trie.cc -o build/temp.linux-x86_64-3.10/lm/trie.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/trie_sort.cc -o build/temp.linux-x86_64-3.10/lm/trie_sort.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" In file included from lm/trie_sort.cc:6:\n",
|
||||
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
|
||||
" | ^~~~~\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/value_build.cc -o build/temp.linux-x86_64-3.10/lm/value_build.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" In file included from ./lm/model.hh:13,\n",
|
||||
" from lm/value_build.cc:3:\n",
|
||||
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
|
||||
" | ^~~~~\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/virtual_interface.cc -o build/temp.linux-x86_64-3.10/lm/virtual_interface.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c lm/vocab.cc -o build/temp.linux-x86_64-3.10/lm/vocab.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" In file included from lm/vocab.cc:1:\n",
|
||||
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
|
||||
" | ^~~~~\n",
|
||||
" lm/vocab.cc:285:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 285 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException) {\n",
|
||||
" | ^~~~~\n",
|
||||
" lm/vocab.cc:297:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 297 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException) {\n",
|
||||
" | ^~~~~\n",
|
||||
" gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -ffat-lto-objects -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -march=x86-64 -mtune=generic -O3 -pipe -fno-plt -fexceptions -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -fstack-clash-protection -fcf-protection -flto=auto -fPIC -I. -I/usr/include/python3.10 -c python/kenlm.cpp -o build/temp.linux-x86_64-3.10/python/kenlm.o -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 -std=c++11 -DHAVE_ZLIB -DHAVE_BZLIB -DHAVE_XZLIB\n",
|
||||
" In file included from ./lm/model.hh:13,\n",
|
||||
" from python/kenlm.cpp:253:\n",
|
||||
" ./lm/vocab.hh:210:43: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 210 | void MissingUnknown(const Config &config) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:211:67: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 211 | void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);\n",
|
||||
" | ^~~~~\n",
|
||||
" ./lm/vocab.hh:213:85: warning: dynamic exception specifications are deprecated in C++11 [-Wdeprecated]\n",
|
||||
" 213 | template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {\n",
|
||||
" | ^~~~~\n",
|
||||
" In file included from /usr/include/python3.10/Python.h:74,\n",
|
||||
" from python/kenlm.cpp:16:\n",
|
||||
" python/kenlm.cpp: In function ‘void __pyx_tp_dealloc_5kenlm_Model(PyObject*)’:\n",
|
||||
" /usr/include/python3.10/object.h:133:33: error: lvalue required as increment operand\n",
|
||||
" 133 | #define Py_REFCNT(ob) _Py_REFCNT(_PyObject_CAST_CONST(ob))\n",
|
||||
" | ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:4398:7: note: in expansion of macro ‘Py_REFCNT’\n",
|
||||
" 4398 | ++Py_REFCNT(o);\n",
|
||||
" | ^~~~~~~~~\n",
|
||||
" /usr/include/python3.10/object.h:133:33: error: lvalue required as decrement operand\n",
|
||||
" 133 | #define Py_REFCNT(ob) _Py_REFCNT(_PyObject_CAST_CONST(ob))\n",
|
||||
" | ~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:4400:7: note: in expansion of macro ‘Py_REFCNT’\n",
|
||||
" 4400 | --Py_REFCNT(o);\n",
|
||||
" | ^~~~~~~~~\n",
|
||||
" python/kenlm.cpp: In function ‘PyObject* PyInit_kenlm()’:\n",
|
||||
" python/kenlm.cpp:4876:37: error: ‘PyTypeObject’ {aka ‘struct _typeobject’} has no member named ‘tp_print’\n",
|
||||
" 4876 | __pyx_type_5kenlm_FullScoreReturn.tp_print = 0;\n",
|
||||
" | ^~~~~~~~\n",
|
||||
" python/kenlm.cpp:4880:27: error: ‘PyTypeObject’ {aka ‘struct _typeobject’} has no member named ‘tp_print’\n",
|
||||
" 4880 | __pyx_type_5kenlm_State.tp_print = 0;\n",
|
||||
" | ^~~~~~~~\n",
|
||||
" python/kenlm.cpp:4884:28: error: ‘PyTypeObject’ {aka ‘struct _typeobject’} has no member named ‘tp_print’\n",
|
||||
" 4884 | __pyx_type_5kenlm_Config.tp_print = 0;\n",
|
||||
" | ^~~~~~~~\n",
|
||||
" python/kenlm.cpp:4888:27: error: ‘PyTypeObject’ {aka ‘struct _typeobject’} has no member named ‘tp_print’\n",
|
||||
" 4888 | __pyx_type_5kenlm_Model.tp_print = 0;\n",
|
||||
" | ^~~~~~~~\n",
|
||||
" python/kenlm.cpp:4902:53: error: ‘PyTypeObject’ {aka ‘struct _typeobject’} has no member named ‘tp_print’\n",
|
||||
" 4902 | __pyx_type_5kenlm___pyx_scope_struct__full_scores.tp_print = 0;\n",
|
||||
" | ^~~~~~~~\n",
|
||||
" In file included from /usr/include/python3.10/unicodeobject.h:1046,\n",
|
||||
" from /usr/include/python3.10/Python.h:83,\n",
|
||||
" from python/kenlm.cpp:16:\n",
|
||||
" python/kenlm.cpp: In function ‘int __Pyx_ParseOptionalKeywords(PyObject*, PyObject***, PyObject*, PyObject**, Py_ssize_t, const char*)’:\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n",
|
||||
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
|
||||
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:261:7: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n",
|
||||
" 261 | PyUnicode_WSTR_LENGTH(op) : \\\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:5396:22: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n",
|
||||
" 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
|
||||
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:262:33: warning: ‘Py_UNICODE* PyUnicode_AsUnicode(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n",
|
||||
" 262 | ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\\\n",
|
||||
" | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:5396:22: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n",
|
||||
" 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:580:45: note: declared here\n",
|
||||
" 580 | Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n",
|
||||
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
|
||||
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:264:8: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n",
|
||||
" 264 | PyUnicode_WSTR_LENGTH(op)))\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:5396:22: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n",
|
||||
" 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
|
||||
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n",
|
||||
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
|
||||
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:261:7: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n",
|
||||
" 261 | PyUnicode_WSTR_LENGTH(op) : \\\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:5396:52: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n",
|
||||
" 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
|
||||
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:262:33: warning: ‘Py_UNICODE* PyUnicode_AsUnicode(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n",
|
||||
" 262 | ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\\\n",
|
||||
" | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:5396:52: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n",
|
||||
" 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:580:45: note: declared here\n",
|
||||
" 580 | Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n",
|
||||
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
|
||||
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:264:8: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n",
|
||||
" 264 | PyUnicode_WSTR_LENGTH(op)))\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:5396:52: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n",
|
||||
" 5396 | (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
|
||||
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n",
|
||||
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
|
||||
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:261:7: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n",
|
||||
" 261 | PyUnicode_WSTR_LENGTH(op) : \\\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:5412:26: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n",
|
||||
" 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
|
||||
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:262:33: warning: ‘Py_UNICODE* PyUnicode_AsUnicode(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n",
|
||||
" 262 | ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\\\n",
|
||||
" | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:5412:26: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n",
|
||||
" 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:580:45: note: declared here\n",
|
||||
" 580 | Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n",
|
||||
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
|
||||
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:264:8: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n",
|
||||
" 264 | PyUnicode_WSTR_LENGTH(op)))\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:5412:26: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n",
|
||||
" 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
|
||||
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n",
|
||||
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
|
||||
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:261:7: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n",
|
||||
" 261 | PyUnicode_WSTR_LENGTH(op) : \\\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:5412:59: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n",
|
||||
" 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
|
||||
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:262:33: warning: ‘Py_UNICODE* PyUnicode_AsUnicode(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n",
|
||||
" 262 | ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\\\n",
|
||||
" | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:5412:59: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n",
|
||||
" 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:580:45: note: declared here\n",
|
||||
" 580 | Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:451:61: warning: ‘Py_ssize_t _PyUnicode_get_wstr_length(PyObject*)’ is deprecated [-Wdeprecated-declarations]\n",
|
||||
" 451 | #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)\n",
|
||||
" | ~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:264:8: note: in expansion of macro ‘PyUnicode_WSTR_LENGTH’\n",
|
||||
" 264 | PyUnicode_WSTR_LENGTH(op)))\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp:5412:59: note: in expansion of macro ‘PyUnicode_GET_SIZE’\n",
|
||||
" 5412 | (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~\n",
|
||||
" /usr/include/python3.10/cpython/unicodeobject.h:446:26: note: declared here\n",
|
||||
" 446 | static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {\n",
|
||||
" | ^~~~~~~~~~~~~~~~~~~~~~~~~~\n",
|
||||
" python/kenlm.cpp: In function ‘void __Pyx_ExceptionSave(PyObject**, PyObject**, PyObject**)’:\n",
|
||||
" python/kenlm.cpp:5583:21: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n",
|
||||
" 5583 | *type = tstate->exc_type;\n",
|
||||
" | ^~~~~~~~\n",
|
||||
" | curexc_type\n",
|
||||
" python/kenlm.cpp:5584:22: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n",
|
||||
" 5584 | *value = tstate->exc_value;\n",
|
||||
" | ^~~~~~~~~\n",
|
||||
" | curexc_value\n",
|
||||
" python/kenlm.cpp:5585:19: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n",
|
||||
" 5585 | *tb = tstate->exc_traceback;\n",
|
||||
" | ^~~~~~~~~~~~~\n",
|
||||
" | curexc_traceback\n",
|
||||
" python/kenlm.cpp: In function ‘void __Pyx_ExceptionReset(PyObject*, PyObject*, PyObject*)’:\n",
|
||||
" python/kenlm.cpp:5597:24: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n",
|
||||
" 5597 | tmp_type = tstate->exc_type;\n",
|
||||
" | ^~~~~~~~\n",
|
||||
" | curexc_type\n",
|
||||
" python/kenlm.cpp:5598:25: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n",
|
||||
" 5598 | tmp_value = tstate->exc_value;\n",
|
||||
" | ^~~~~~~~~\n",
|
||||
" | curexc_value\n",
|
||||
" python/kenlm.cpp:5599:22: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n",
|
||||
" 5599 | tmp_tb = tstate->exc_traceback;\n",
|
||||
" | ^~~~~~~~~~~~~\n",
|
||||
" | curexc_traceback\n",
|
||||
" python/kenlm.cpp:5600:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n",
|
||||
" 5600 | tstate->exc_type = type;\n",
|
||||
" | ^~~~~~~~\n",
|
||||
" | curexc_type\n",
|
||||
" python/kenlm.cpp:5601:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n",
|
||||
" 5601 | tstate->exc_value = value;\n",
|
||||
" | ^~~~~~~~~\n",
|
||||
" | curexc_value\n",
|
||||
" python/kenlm.cpp:5602:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n",
|
||||
" 5602 | tstate->exc_traceback = tb;\n",
|
||||
" | ^~~~~~~~~~~~~\n",
|
||||
" | curexc_traceback\n",
|
||||
" python/kenlm.cpp: In function ‘int __Pyx_GetException(PyObject**, PyObject**, PyObject**)’:\n",
|
||||
" python/kenlm.cpp:5645:24: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n",
|
||||
" 5645 | tmp_type = tstate->exc_type;\n",
|
||||
" | ^~~~~~~~\n",
|
||||
" | curexc_type\n",
|
||||
" python/kenlm.cpp:5646:25: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n",
|
||||
" 5646 | tmp_value = tstate->exc_value;\n",
|
||||
" | ^~~~~~~~~\n",
|
||||
" | curexc_value\n",
|
||||
" python/kenlm.cpp:5647:22: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n",
|
||||
" 5647 | tmp_tb = tstate->exc_traceback;\n",
|
||||
" | ^~~~~~~~~~~~~\n",
|
||||
" | curexc_traceback\n",
|
||||
" python/kenlm.cpp:5648:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n",
|
||||
" 5648 | tstate->exc_type = local_type;\n",
|
||||
" | ^~~~~~~~\n",
|
||||
" | curexc_type\n",
|
||||
" python/kenlm.cpp:5649:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n",
|
||||
" 5649 | tstate->exc_value = local_value;\n",
|
||||
" | ^~~~~~~~~\n",
|
||||
" | curexc_value\n",
|
||||
" python/kenlm.cpp:5650:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n",
|
||||
" 5650 | tstate->exc_traceback = local_tb;\n",
|
||||
" | ^~~~~~~~~~~~~\n",
|
||||
" | curexc_traceback\n",
|
||||
" python/kenlm.cpp: In function ‘void __Pyx_ExceptionSwap(PyObject**, PyObject**, PyObject**)’:\n",
|
||||
" python/kenlm.cpp:6376:24: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n",
|
||||
" 6376 | tmp_type = tstate->exc_type;\n",
|
||||
" | ^~~~~~~~\n",
|
||||
" | curexc_type\n",
|
||||
" python/kenlm.cpp:6377:25: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n",
|
||||
" 6377 | tmp_value = tstate->exc_value;\n",
|
||||
" | ^~~~~~~~~\n",
|
||||
" | curexc_value\n",
|
||||
" python/kenlm.cpp:6378:22: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n",
|
||||
" 6378 | tmp_tb = tstate->exc_traceback;\n",
|
||||
" | ^~~~~~~~~~~~~\n",
|
||||
" | curexc_traceback\n",
|
||||
" python/kenlm.cpp:6379:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_type’; did you mean ‘curexc_type’?\n",
|
||||
" 6379 | tstate->exc_type = *type;\n",
|
||||
" | ^~~~~~~~\n",
|
||||
" | curexc_type\n",
|
||||
" python/kenlm.cpp:6380:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_value’; did you mean ‘curexc_value’?\n",
|
||||
" 6380 | tstate->exc_value = *value;\n",
|
||||
" | ^~~~~~~~~\n",
|
||||
" | curexc_value\n",
|
||||
" python/kenlm.cpp:6381:13: error: ‘PyThreadState’ {aka ‘struct _ts’} has no member named ‘exc_traceback’; did you mean ‘curexc_traceback’?\n",
|
||||
" 6381 | tstate->exc_traceback = *tb;\n",
|
||||
" | ^~~~~~~~~~~~~\n",
|
||||
" | curexc_traceback\n",
|
||||
" error: command '/usr/bin/gcc' failed with exit code 1\n",
|
||||
" ----------------------------------------\u001b[0m\n",
|
||||
"\u001b[31m ERROR: Failed building wheel for kenlm\u001b[0m\n",
|
||||
"\u001b[?25h Running setup.py clean for kenlm\n"
|
||||
"Loading the LM will be faster if you build a binary file.\n",
|
||||
"Reading /home/me/challenging-america-word-gap-prediction-kenlm/model.arpa\n",
|
||||
"----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Failed to build kenlm\n",
|
||||
"Installing collected packages: kenlm\n",
|
||||
" Running setup.py install for kenlm ... \u001b[?25l-"
|
||||
"ename": "OSError",
|
||||
"evalue": "Cannot read model './model.arpa' (End of file Byte: 0)",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
|
||||
"File \u001b[0;32mkenlm.pyx:139\u001b[0m, in \u001b[0;36mkenlm.Model.__init__\u001b[0;34m()\u001b[0m\n",
|
||||
"\u001b[0;31mRuntimeError\u001b[0m: End of file Byte: 0",
|
||||
"\nThe above exception was the direct cause of the following exception:\n",
|
||||
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
|
||||
"Input \u001b[0;32mIn [14]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mkenlm\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mkenlm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mModel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m./model.arpa\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32mkenlm.pyx:142\u001b[0m, in \u001b[0;36mkenlm.Model.__init__\u001b[0;34m()\u001b[0m\n",
|
||||
"\u001b[0;31mOSError\u001b[0m: Cannot read model './model.arpa' (End of file Byte: 0)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install kenlm"
|
||||
"import kenlm\n",
|
||||
"model = kenlm.Model(\"./model.arpa\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a849ad70",
|
||||
"id": "b3a22dcd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import kenlm\n",
|
||||
"model = kenlm.Model(\"\")"
|
||||
"def predict(before, after):\n",
|
||||
" result = ''\n",
|
||||
" prob = 0.0\n",
|
||||
" best = []\n",
|
||||
" for word in english_words_alpha_set:\n",
|
||||
" text = ' '.join([before, word, after])\n",
|
||||
" text_score = model.score(text, bos=False, eos=False)\n",
|
||||
" if len(best) < 12:\n",
|
||||
" best.append((word, text_score))\n",
|
||||
" else:\n",
|
||||
" is_better = False\n",
|
||||
" worst_score = None\n",
|
||||
" for score in best:\n",
|
||||
" if not worst_score:\n",
|
||||
" worst_score = score\n",
|
||||
" else:\n",
|
||||
" if worst_score[1] > score[1]:\n",
|
||||
" worst_score = score\n",
|
||||
" if worst_score[1] < text_score:\n",
|
||||
" best.remove(worst_score)\n",
|
||||
" best.append((word, text_score))\n",
|
||||
" probs = sorted(best, key=lambda tup: tup[1], reverse=True)\n",
|
||||
" pred_str = ''\n",
|
||||
" for word, prob in probs:\n",
|
||||
" pred_str += f'{word}:{prob} '\n",
|
||||
" pred_str += f':{log10(0.99)}'\n",
|
||||
" return pred_str"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "913dcf54",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def make_prediction(path, result_path):\n",
|
||||
" data = pd.read_csv(path, sep='\\t', header=None, quoting=csv.QUOTE_NONE)\n",
|
||||
" with open(result_path, 'w', encoding='utf-8') as file_out:\n",
|
||||
" for _, row in data.iterrows():\n",
|
||||
" before, after = word_tokenize(data_preprocessing(str(row[6]))), word_tokenize(data_preprocessing(str(row[7])))\n",
|
||||
" if len(before) < 2 or len(after) < 2:\n",
|
||||
" pred = prediction\n",
|
||||
" else:\n",
|
||||
" pred = predict(before[-1], after[0])\n",
|
||||
" file_out.write(pred + '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "01c1b58d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"make_prediction(\"dev-0/in.tsv.xz\", \"dev-0/out.tsv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6d37cd24",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"make_prediction(\"test-A/in.tsv.xz\", \"test-A/out.tsv\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
Loading…
Reference in New Issue
Block a user