This commit is contained in:
Adam Wojdyla 2023-03-29 12:03:58 +02:00
parent be868f492b
commit 505c0bb9c2
19 changed files with 46 additions and 45 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
.DS_STORE
out-merged.txt

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

@ -1 +1 @@
{'l': (7, 0), 'p': (9, 4), 'D': (15, 320), 'E': (15, 321), _EOF: (16, 644), 'A': (16, 645), 'B': (16, 646), 'C': (16, 647), 'w': (13, 81), 'v': (12, 41), 't': (11, 21), 'r': (10, 11), 'n': (8, 3), 'j': (6, 1), 'h': (5, 1), 'f': (4, 1), 'd': (3, 1), 'b': (2, 1), 'u': (12, 2048), 'y': (14, 8196), 'z': (14, 8197), 'x': (13, 4099), 's': (11, 1025), 'q': (10, 513), 'o': (9, 257), 'm': (8, 129), 'k': (7, 65), 'i': (6, 33), 'g': (5, 17), 'e': (4, 9), 'c': (3, 5), 'a': (2, 3)}
{'p': (9, 0), 't': (11, 4), 'y': (13, 20), 'A': (15, 84), 'C': (15, 85), 'z': (14, 43), 'v': (12, 11), 'r': (10, 3), 'n': (8, 1), 'l': (7, 1), 'j': (6, 1), 'h': (5, 1), 'f': (4, 1), 'd': (3, 1), 'b': (2, 1), 'D': (16, 32768), 'H': (16, 32769), 'M': (16, 32770), _EOF: (17, 65542), 'B': (17, 65543), 'x': (14, 8193), 'w': (13, 4097), 'u': (12, 2049), 's': (11, 1025), 'q': (10, 513), 'o': (9, 257), 'm': (8, 129), 'k': (7, 65), 'i': (6, 33), 'g': (5, 17), 'e': (4, 9), 'c': (3, 5), 'a': (2, 3)}

View File

@ -1 +1 @@
{'7': (5, 0), _EOF: (7, 4), 'C': (7, 5), 'r': (6, 3), '1': (6, 4), 'y': (6, 5), 'Z': (6, 6), 'm': (6, 7), '5': (6, 8), 'J': (6, 9), 'Y': (6, 10), 'E': (6, 11), 'v': (6, 12), 'p': (6, 13), 'c': (6, 14), 'w': (6, 15), 'B': (6, 16), 'g': (6, 17), '3': (6, 18), 'x': (6, 19), 'q': (6, 20), 's': (6, 21), 'b': (6, 22), 'i': (6, 23), 'k': (6, 24), '2': (6, 25), '9': (6, 26), 'G': (6, 27), 'S': (6, 28), 'A': (6, 29), 'f': (6, 30), 'l': (6, 31), 'e': (6, 32), 'M': (6, 33), 'W': (6, 34), 'P': (6, 35), 'O': (6, 36), 'j': (6, 37), '0': (6, 38), 'u': (6, 39), 'T': (6, 40), '4': (6, 41), 'o': (6, 42), 'I': (6, 43), '6': (6, 44), 't': (6, 45), 'L': (6, 46), '8': (6, 47), ' ': (6, 48), 'V': (6, 49), 'h': (6, 50), 'Q': (6, 51), 'U': (6, 52), 'F': (6, 53), 'K': (6, 54), 'n': (6, 55), 'R': (6, 56), 'z': (6, 57), 'H': (6, 58), 'a': (6, 59), 'd': (6, 60), 'N': (6, 61), 'D': (6, 62), 'X': (6, 63)}
{'q': (5, 0), _EOF: (7, 4), 'y': (7, 5), 'f': (6, 3), 't': (6, 4), 'N': (6, 5), 'M': (6, 6), 'U': (6, 7), '0': (6, 8), '2': (6, 9), 'K': (6, 10), '9': (6, 11), 'A': (6, 12), 'm': (6, 13), '1': (6, 14), 'J': (6, 15), 'z': (6, 16), 'S': (6, 17), ' ': (6, 18), 'd': (6, 19), 'Y': (6, 20), 'O': (6, 21), 'x': (6, 22), '4': (6, 23), 'k': (6, 24), 'D': (6, 25), 'E': (6, 26), 'i': (6, 27), 'p': (6, 28), 'P': (6, 29), 'G': (6, 30), 'C': (6, 31), 'o': (6, 32), 'F': (6, 33), 'V': (6, 34), 'j': (6, 35), 'w': (6, 36), 'Z': (6, 37), 's': (6, 38), 'I': (6, 39), 'L': (6, 40), 'Q': (6, 41), 'r': (6, 42), 'l': (6, 43), 'H': (6, 44), 'T': (6, 45), 'g': (6, 46), 'e': (6, 47), 'B': (6, 48), '6': (6, 49), '5': (6, 50), 'R': (6, 51), 'X': (6, 52), 'b': (6, 53), '3': (6, 54), '8': (6, 55), 'c': (6, 56), 'v': (6, 57), 'a': (6, 58), 'n': (6, 59), '7': (6, 60), 'h': (6, 61), 'W': (6, 62), 'u': (6, 63)}

File diff suppressed because one or more lines are too long

View File

@ -1 +1 @@
{_EOF: (2, 0), '1': (2, 1), '0': (1, 1)}
{_EOF: (2, 0), '0': (2, 1), '1': (1, 1)}

File diff suppressed because one or more lines are too long

Binary file not shown.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@ -35,7 +35,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -53,7 +53,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@ -68,7 +68,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@ -85,7 +85,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@ -100,7 +100,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@ -123,7 +123,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 7,
"metadata": {},
"outputs": [
{
@ -133,13 +133,13 @@
"Compression complete. The compressed archive is saved as files_tar/own_corpus.tar.gz.\n",
"Compression ratio: 4.597193872860006\n",
"Compression complete. The compressed archive is saved as files_tar/random_text_geometric_distribution.tar.gz.\n",
"Compression ratio: 2.2354861064538483\n",
"Compression ratio: 2.238588793624499\n",
"Compression complete. The compressed archive is saved as files_tar/random_text_uniform_distribution.tar.gz.\n",
"Compression ratio: 1.3254319914218042\n",
"Compression ratio: 1.3254407753298358\n",
"Compression complete. The compressed archive is saved as files_tar/random_text_uniform_two_point_05_distribution.tar.gz.\n",
"Compression ratio: 6.656903208627346\n",
"Compression ratio: 6.656282865396648\n",
"Compression complete. The compressed archive is saved as files_tar/random_text_uniform_two_point_09_distribution.tar.gz.\n",
"Compression ratio: 12.2086705978586\n"
"Compression ratio: 12.23555898151207\n"
]
}
],
@ -164,7 +164,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 8,
"metadata": {},
"outputs": [
{
@ -172,10 +172,10 @@
"output_type": "stream",
"text": [
"Entropy for files_txt/own_corpus.txt: 1.754256\n",
"Entropy for files_txt/random_text_geometric_distribution.txt: 3.5624\n",
"Entropy for files_txt/random_text_uniform_distribution.txt: 6.033632\n",
"Entropy for files_txt/random_text_uniform_two_point_05_distribution.txt: 1.273352\n",
"Entropy for files_txt/random_text_uniform_two_point_09_distribution.txt: 0.761152\n"
"Entropy for files_txt/random_text_geometric_distribution.txt: 3.56064\n",
"Entropy for files_txt/random_text_uniform_distribution.txt: 6.0336\n",
"Entropy for files_txt/random_text_uniform_two_point_05_distribution.txt: 1.274304\n",
"Entropy for files_txt/random_text_uniform_two_point_09_distribution.txt: 0.75892\n"
]
}
],
@ -200,7 +200,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 9,
"metadata": {},
"outputs": [
{
@ -211,17 +211,17 @@
"First 3: r e s\n",
"Binary: 0100 001 0001\n",
"Calculating Huffman code for file: files_txt/random_text_geometric_distribution.txt...\n",
"First 3: b c b\n",
"Binary: 01 101 01\n",
"First 3: d d d\n",
"Binary: 001 001 001\n",
"Calculating Huffman code for file: files_txt/random_text_uniform_distribution.txt...\n",
"First 3: 3 C t\n",
"Binary: 010010 0000101 101101\n",
"First 3: Q l M\n",
"Binary: 101001 101011 000110\n",
"Calculating Huffman code for file: files_txt/random_text_uniform_two_point_05_distribution.txt...\n",
"First 3: 1 0 0\n",
"Binary: 01 1 1\n",
"First 3: 0 0 0\n",
"Binary: 01 01 01\n",
"Calculating Huffman code for file: files_txt/random_text_uniform_two_point_09_distribution.txt...\n",
"First 3: 1 0 1\n",
"Binary: 1 01 1\n"
"First 3: 0 1 1\n",
"Binary: 01 1 1\n"
]
}
],
@ -270,7 +270,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 10,
"metadata": {},
"outputs": [
{
@ -284,16 +284,16 @@
"Size of files_txt/random_text_uniform_two_point_09_distribution.txt: 1000000 bytes, 8000000 bits\n",
"********************************************************************************\n",
"Size of files_tar/own_corpus.tar.gz: 217524 bytes, 1740192 bits\n",
"Size of files_tar/random_text_geometric_distribution.tar.gz: 44733 bytes, 357864 bits\n",
"Size of files_tar/random_text_uniform_distribution.tar.gz: 754471 bytes, 6035768 bits\n",
"Size of files_tar/random_text_uniform_two_point_05_distribution.tar.gz: 150220 bytes, 1201760 bits\n",
"Size of files_tar/random_text_uniform_two_point_09_distribution.tar.gz: 81909 bytes, 655272 bits\n",
"Size of files_tar/random_text_geometric_distribution.tar.gz: 44671 bytes, 357368 bits\n",
"Size of files_tar/random_text_uniform_distribution.tar.gz: 754466 bytes, 6035728 bits\n",
"Size of files_tar/random_text_uniform_two_point_05_distribution.tar.gz: 150234 bytes, 1201872 bits\n",
"Size of files_tar/random_text_uniform_two_point_09_distribution.tar.gz: 81729 bytes, 653832 bits\n",
"********************************************************************************\n",
"Size of files_txt/own_corpus.txt + codetable: 544399 bytes, 548781 bits\n",
"Size of files_txt/random_text_geometric_distribution.txt + codetable: 37584 bytes, 40895 bits\n",
"Size of files_txt/random_text_uniform_distribution.txt + codetable: 750834 bytes, 757043 bits\n",
"Size of files_txt/random_text_uniform_two_point_05_distribution.txt + codetable: 187491 bytes, 187771 bits\n",
"Size of files_txt/random_text_uniform_two_point_09_distribution.txt + codetable: 137530 bytes, 137810 bits\n"
"Size of files_txt/random_text_geometric_distribution.txt + codetable: 37569 bytes, 41020 bits\n",
"Size of files_txt/random_text_uniform_distribution.txt + codetable: 750822 bytes, 757031 bits\n",
"Size of files_txt/random_text_uniform_two_point_05_distribution.txt + codetable: 187501 bytes, 187781 bits\n",
"Size of files_txt/random_text_uniform_two_point_09_distribution.txt + codetable: 137499 bytes, 137779 bits\n"
]
}
],
@ -380,7 +380,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 11,
"metadata": {},
"outputs": [
{
@ -389,7 +389,7 @@
"text": [
"Entropy for words in files_txt/own_corpus.txt: 9.27320212652544\n",
"Entropy for words in files_txt/random_text_geometric_distribution.txt: -0.0\n",
"Entropy for words in files_txt/random_text_uniform_distribution.txt: 13.897386156097086\n",
"Entropy for words in files_txt/random_text_uniform_distribution.txt: 13.889640822372847\n",
"Entropy for words in files_txt/random_text_uniform_two_point_05_distribution.txt: -0.0\n",
"Entropy for words in files_txt/random_text_uniform_two_point_09_distribution.txt: -0.0\n"
]