Update notebook 08

This commit is contained in:
Ryszard Staruch 2024-12-04 16:46:08 +01:00
parent 518b2764e7
commit 12adb37a91

View File

@ -77,7 +77,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 67, "execution_count": 79,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -118,7 +118,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 78, "execution_count": 90,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -128,10 +128,10 @@
"Tekst 'cat' jest konwertowany do tokenu 9246\n", "Tekst 'cat' jest konwertowany do tokenu 9246\n",
"\n", "\n",
"Tokenizacja\n", "Tokenizacja\n",
"{'input_ids': [33215], 'attention_mask': [1]}\n", "{'input_ids': [9246], 'attention_mask': [1]}\n",
"\n", "\n",
"Detokenizacja\n", "Detokenizacja\n",
"computer\n", "cat\n",
"\n", "\n",
"Liczba tokenów w słowniku\n", "Liczba tokenów w słowniku\n",
"50257\n" "50257\n"
@ -141,9 +141,9 @@
"source": [ "source": [
"print(\"Tekst 'cat' jest konwertowany do tokenu 9246\")\n", "print(\"Tekst 'cat' jest konwertowany do tokenu 9246\")\n",
"print(\"\\nTokenizacja\")\n", "print(\"\\nTokenizacja\")\n",
"print(tokenizer(\"computer\"))\n", "print(tokenizer(\"cat\"))\n",
"print(\"\\nDetokenizacja\")\n", "print(\"\\nDetokenizacja\")\n",
"print(tokenizer.decode([33215]))\n", "print(tokenizer.decode([9246]))\n",
"print(\"\\nLiczba tokenów w słowniku\")\n", "print(\"\\nLiczba tokenów w słowniku\")\n",
"print(len(tokenizer))" "print(len(tokenizer))"
] ]
@ -274,7 +274,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 89,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {