This commit is contained in:
Adam Wojdyla 2023-03-29 04:08:59 +02:00
parent 7efcacbe53
commit c9d19f350c
29 changed files with 2457 additions and 28 deletions

View File

@ -1,6 +1,6 @@
import pandas import pandas
import regex as re import regex as re
import argparse, sys import argparse
parser=argparse.ArgumentParser() parser=argparse.ArgumentParser()
parser.add_argument("--filepath",) parser.add_argument("--filepath",)
@ -19,15 +19,15 @@ def filter_line(line):
return line is not None and len(line) > 30 and is_letter_sentence(line) and is_asci(line) return line is not None and len(line) > 30 and is_letter_sentence(line) and is_asci(line)
def clean_with_regex(text): def clean_with_regex(text):
text = str(text).encode("ascii", "ignore").decode("utf-8") # text = str(text).encode("ascii", "ignore").decode("utf-8")
regex_pattern = "(?<=..\.)(\s+)(?=\(\d+\))|(?<=..\.)(\s+)(?=\d\.)|(?<=..\.)(\s+)(?=Article \d+)" regex_pattern = r"(?<=..\.)(\s+)(?=\(\d+\))|(?<=..\.)(\s+)(?=\d\.)|(?<=..\.)(\s+)(?=Article \d+)"
try: try:
out = re.split(regex_pattern, text) out = re.split(regex_pattern, text)
except TypeError as e: except TypeError as e:
return [] return []
out = list(filter(lambda item: filter_line(item), out)) out = list(filter(lambda item: filter_line(item), out))
out = list(map(lambda item: re.sub("(?<=\d)(\(\d+\))(?=\s+)|(\(\d+\)\s+)|(\d+\.)+\s", "", item), out)) out = list(map(lambda item: re.sub(r"(?<=\d)(\(\d+\))(?=\s+)|(\(\d+\)\s+)|(\d+\.)+\s", " ", item), out))
out = list(map(lambda item: re.sub("[^\w\d\s\\\)\(\/-]", "", item), out)) out = list(map(lambda item: re.sub(r"[^\w\d\s\\\)\(\/-]|[^\x00-\x7F]|ex\d+", " ", item), out))
if out: if out:
out.pop(len(out)-1) out.pop(len(out)-1)
return out return out

View File

@ -1,5 +1,11 @@
# Statystyki # Statystyki
## Uruchomienie skryptu
Należy uruchomić skrypt pythonowy statistics.py. Wynikiem działania programu są utworzone zdjęcia w folderze /images.
```python statistics.py --filePath {sciezka_do_pliku}```
## Statystyki podstawowe ## Statystyki podstawowe
### 10 nadłuższych słów ### 10 nadłuższych słów

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -3,14 +3,22 @@ from collections import Counter
from collections import OrderedDict from collections import OrderedDict
import regex as re import regex as re
from math import log from math import log
import argparse
import os
file_path = "Lab1/out-merged.txt" parser=argparse.ArgumentParser()
parser.add_argument("--filepath")
args=parser.parse_args()
FILE_PATH = "Lab1/out-merged.txt" if args.filepath is None else args.filepath
IMAGES_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "images")
file_content = None file_content = None
with open(file_path, 'r') as file: with open(FILE_PATH, 'r') as file:
file_content = file.read() file_content = file.read()
file.close()
# file_content = file_content[:100] # file_content = file_content[:10000000]
def get_characters(t): def get_characters(t):
yield from t yield from t
@ -36,9 +44,9 @@ def rang_freq_with_labels(name, g, top=None):
plt.ylabel('liczba wystąpień') plt.ylabel('liczba wystąpień')
plt.bar(freq.keys(), freq.values()) plt.bar(freq.keys(), freq.values())
fname = f'Lab2/images/{name}.png' fname = f'/{name}.png'
plt.savefig(fname) plt.savefig(IMAGES_PATH + fname)
return fname return fname
@ -48,9 +56,9 @@ def log_rang_log_freq(name, g):
plt.figure().clear() plt.figure().clear()
plt.plot([log(x) for x in range(1, len(freq.values())+1)], [log(y) for y in freq.values()]) plt.plot([log(x) for x in range(1, len(freq.values())+1)], [log(y) for y in freq.values()])
fname = f'Lab2/images/{name}.png' fname = f'/{name}.png'
plt.savefig(fname) plt.savefig(IMAGES_PATH + fname)
return fname return fname
@ -67,15 +75,15 @@ def get_ngrams(t, size):
for m in ngrams(word, size): for m in ngrams(word, size):
yield m yield m
def get_w_freq_by_w_len(word_len): def get_w_freq_by_w_len(freq, word_len):
for word, count in freq.items(): for word, count in freq.items():
if len(word) == word_len: if len(word) == word_len:
yield (count, word) yield (count, word)
def get_average_freq_by_w_len(word_lenghts): def get_average_freq_by_w_len(freq, word_lenghts):
results = dict() results = dict()
for l in word_lenghts: for l in word_lenghts:
word_freq = list(get_w_freq_by_w_len(l)) word_freq = list(get_w_freq_by_w_len(freq, l))
if len(word_freq) == 0: if len(word_freq) == 0:
continue continue
average = sum([w[0] for w in word_freq]) / len(word_freq) average = sum([w[0] for w in word_freq]) / len(word_freq)
@ -83,20 +91,20 @@ def get_average_freq_by_w_len(word_lenghts):
return results return results
def get_low_high_freq_by_w_len(word_lenghts): def get_low_high_freq_by_w_len(freq, word_lenghts, average_freq):
""" """
Returns top 5 most frequent and non frequent words for each word length + average frequency. Returns top 5 most frequent and non frequent words for each word length + average frequency.
""" """
results = [] results = []
for l in word_lenghts: for l in word_lenghts:
word_freq = list(get_w_freq_by_w_len(l)) word_freq = list(get_w_freq_by_w_len(freq, l))
word_freq.sort() word_freq.sort()
word_freq = list(filter(lambda t: re.findall("\d",str(t[1])) == [] and t[0] > 30, word_freq)) word_freq = list(filter(lambda t: re.findall("\d",str(t[1])) == [] and t[0] > 30, word_freq))
word_stats = { word_stats = {
'word_len': l, 'word_len': l,
'average_freq': average_freq[l], 'average_freq': average_freq[l],
'low_freq': word_freq[:10], 'low_freq': word_freq[:5],
'high_freq': word_freq[-10:] 'high_freq': word_freq[-5:]
} }
results.append(word_stats) results.append(word_stats)
return results return results
@ -111,7 +119,7 @@ def get_pronouns_stats(freqs):
plt.figure(figsize=(12, 3)) plt.figure(figsize=(12, 3))
plt.ylabel('liczba wystąpień') plt.ylabel('liczba wystąpień')
plt.bar(x, y) plt.bar(x, y)
plt.savefig("Lab2/images/pt-pronouns.png") plt.savefig(IMAGES_PATH + "/pt-pronouns.png")
return pronoun_words_freq return pronoun_words_freq
@ -123,31 +131,48 @@ def get_years_stats(freqs):
plt.figure(figsize=(12, 3)) plt.figure(figsize=(12, 3))
plt.ylabel('liczba wystąpień') plt.ylabel('liczba wystąpień')
plt.bar(x, y) plt.bar(x, y)
plt.savefig("Lab2/images/pt-years.png") plt.savefig(IMAGES_PATH + "/pt-years.png")
return years_word_freq return years_word_freq
def get_longest_words(top):
all_words = list(get_words(file_content))
deduplicated_word_listr = [*set(all_words)]
deduplicated_word_listr.sort(key=len)
deduplicated_word_listr.reverse()
return deduplicated_word_listr[:top]
print("Generating statistics...") print("Generating statistics...")
# 10 longest words
print("Calculating 10 longest words...")
print(get_longest_words(10))
# 10 most frequent words in the text # 10 most frequent words in the text
rang_freq_with_labels('most-freq-words-20', get_words(file_content), top=20) print("Calculating 10 most frequent words in the text...")
rang_freq_with_labels('most-freq-words-10', get_words(file_content), top=10)
# Zipf's law # Zipf's law
print("Calculating Zipf's law...")
log_rang_log_freq('zipf-law-words', get_words(file_content)) log_rang_log_freq('zipf-law-words', get_words(file_content))
# Zipf's law for 3-grams # Zipf's law for 3-grams
print("Calculating Zipf's law for 3-grams...")
log_rang_log_freq('zipf-law-3grams', get_ngrams(file_content, 3)) log_rang_log_freq('zipf-law-3grams', get_ngrams(file_content, 3))
# Words breaking the Zipf's law # Words breaking the Zipf's law
print("Calculating words breaking the Zipf's law...")
freq = freq_list(get_words(file_content)) freq = freq_list(get_words(file_content))
lenghts = [*set(len(f[0]) for f in freq.items())] lenghts = [*set(len(f[0]) for f in freq.items())]
average_freq = get_average_freq_by_w_len(lenghts) average_freq = get_average_freq_by_w_len(freq, lenghts)
get_low_high_freq_by_w_len(lenghts) get_low_high_freq_by_w_len(freq, lenghts, average_freq)
# Frequency of pronouns # Frequency of pronouns
print("Calculating frequency of pronouns...")
get_pronouns_stats(freq) get_pronouns_stats(freq)
print("Done")
# Number of years in words # Number of years in words
get_years_stats(freq) print("Calculating number of years in words...")
get_years_stats(freq)
print("Done")

BIN
Lab3/DrzewoHuffmana.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 149 KiB

409
Lab3/lab3_solution.ipynb Normal file
View File

@ -0,0 +1,409 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Zadanie 1"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Generowanie plików"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import string\n",
"\n",
"# Set the length of the string to generate\n",
"string_length = 1000000\n",
"\n",
"# Define the character set to choose from\n",
"character_set = np.array(list(string.ascii_letters + string.digits))"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [],
"source": [
"with open(\"../Lab1/out-merged.txt\", 'r') as file:\n",
" file_content = file.read()\n",
" first_chars = file_content[:string_length]\n",
"\n",
" with open(\"./own_corpus.txt\", 'w') as f:\n",
" f.write(first_chars)"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"# Generate the random string using uniform distribution\n",
"random_indices = np.random.uniform(low=0, high=len(character_set), size=string_length).astype(int)\n",
"random_string = ''.join(character_set[random_indices])\n",
"\n",
"with open('random_text_uniform_distribution.txt', 'w') as f:\n",
" f.write(random_string)"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"# Generate the random string using geometric distribution\n",
"p = 0.3\n",
"random_integers = np.random.geometric(p, 100000)\n",
"random_indices = [i - 1 for i in random_integers]\n",
"random_characters = [character_set[i % len(character_set)] for i in random_indices]\n",
"random_string = ''.join(random_characters)\n",
"\n",
"\n",
"with open('random_text_geometric_distribution.txt', 'w') as f:\n",
" f.write(random_string)"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [],
"source": [
"# Generate the random string using uniform two-point distribution with p=0.5\n",
"character_set = np.array(list('01'))\n",
"random_indices = np.random.choice([0, len(character_set)-1], size=string_length, p=[0.5, 0.5])\n",
"random_string = ''.join(character_set[random_indices])\n",
"\n",
"with open('random_text_uniform_two_point_05_distribution.txt', 'w') as f:\n",
" f.write(random_string)"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"# Generate the random string using uniform two-point distribution with p=0.9\n",
"character_set = np.array(list('01'))\n",
"random_indices = np.random.choice([0, len(character_set)-1], size=string_length, p=[0.1, 0.9])\n",
"random_string = ''.join(character_set[random_indices])\n",
"\n",
"with open('random_text_uniform_two_point_09_distribution.txt', 'w') as f:\n",
" f.write(random_string)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compress files to .tar"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Compression complete. The compressed archive is saved as own_corpus.tar.gz.\n",
"Compression ratio: 4.59738408845367\n",
"Compression complete. The compressed archive is saved as random_text_uniform_distribution.tar.gz.\n",
"Compression ratio: 1.3293011199361935\n",
"Compression complete. The compressed archive is saved as random_text_geometric_distribution.tar.gz.\n",
"Compression ratio: 2.2415996054784695\n",
"Compression complete. The compressed archive is saved as random_text_uniform_two_point_05_distribution.tar.gz.\n",
"Compression ratio: 6.6557955339611965\n",
"Compression complete. The compressed archive is saved as random_text_uniform_two_point_09_distribution.tar.gz.\n",
"Compression ratio: 12.250398137939483\n"
]
}
],
"source": [
"import tarfile\n",
"import os\n",
"\n",
"def compress_file(file_name):\n",
" output_archive_name = file_name.replace('.txt', '.tar.gz')\n",
" with tarfile.open(output_archive_name, 'w:gz') as tar:\n",
" tar.add(file_name)\n",
"\n",
" print(f'Compression complete. The compressed archive is saved as {output_archive_name}.')\n",
" print(f'Compression ratio: {os.path.getsize(file_name) / os.path.getsize(output_archive_name)}')\n",
"\n",
"file_names = ['own_corpus.txt', 'random_text_uniform_distribution.txt', 'random_text_geometric_distribution.txt', 'random_text_uniform_two_point_05_distribution.txt', 'random_text_uniform_two_point_09_distribution.txt']\n",
"for file in file_names:\n",
" compress_file(file)"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Entropy for own_corpus.txt: 1.754256\n",
"Entropy for random_text_uniform_distribution.txt: 6.016072\n",
"Entropy for random_text_geometric_distribution.txt: 3.54952\n",
"Entropy for random_text_uniform_two_point_05_distribution.txt: 1.272664\n",
"Entropy for random_text_uniform_two_point_09_distribution.txt: 0.761104\n"
]
}
],
"source": [
"import zlib\n",
"\n",
"def entropy_by_compression(t):\n",
" compressed = zlib.compress(t.encode('utf-8'))\n",
" return 8 * len(compressed) / len(t)\n",
"\n",
"for file in file_names:\n",
" print(f\"Entropy for {file}: {entropy_by_compression(open(file, 'r').read())}\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compare file sizes"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Size of own_corpus.txt: 1000000 bytes, 8000000 bits\n",
"Size of random_text_uniform_distribution.txt: 1000000 bytes, 8000000 bits\n",
"Size of random_text_geometric_distribution.txt: 100000 bytes, 800000 bits\n",
"Size of random_text_uniform_two_point_05_distribution.txt: 1000000 bytes, 8000000 bits\n",
"Size of random_text_uniform_two_point_09_distribution.txt: 1000000 bytes, 8000000 bits\n",
"********************************************************************************\n",
"Size of own_corpus.tar.gz: 217515 bytes, 1740120 bits\n",
"Size of random_text_uniform_distribution.tar.gz: 752275 bytes, 6018200 bits\n",
"Size of random_text_geometric_distribution.tar.gz: 44611 bytes, 356888 bits\n",
"Size of random_text_uniform_two_point_05_distribution.tar.gz: 150245 bytes, 1201960 bits\n",
"Size of random_text_uniform_two_point_09_distribution.tar.gz: 81630 bytes, 653040 bits\n",
"********************************************************************************\n",
"Size of own_corpus.txt + codetable: 544399 bytes, 548781 bits\n",
"Size of random_text_uniform_distribution.txt + codetable: 748749 bytes, 754867 bits\n",
"Size of random_text_geometric_distribution.txt + codetable: 37470 bytes, 40788 bits\n",
"Size of random_text_uniform_two_point_05_distribution.txt + codetable: 187473 bytes, 187753 bits\n",
"Size of random_text_uniform_two_point_09_distribution.txt + codetable: 137531 bytes, 137811 bits\n"
]
}
],
"source": [
"# print raw text files sizes\n",
"for file in file_names:\n",
" print(f\"Size of {file}: {os.path.getsize(file)} bytes, {os.path.getsize(file)*8} bits\")\n",
"\n",
"print(\"*\" * 80)\n",
"\n",
"# print compressed text files sizes\n",
"for file in file_names:\n",
" file = file.replace('.txt', '.tar.gz')\n",
" print(f\"Size of {file}: {os.path.getsize(file)} bytes, {os.path.getsize(file)*8} bits\")\n",
"\n",
"print(\"*\" * 80)\n",
"\n",
"# print compressed with Huffman text files sizes\n",
"for file in file_names:\n",
" file1 = file.replace('.txt', '.bin')\n",
" file2 = file.replace('.txt', '_codetable.bin')\n",
" print(f\"Size of {file} + codetable: {os.path.getsize(file1) + os.path.getsize(file2)} bytes, {os.path.getsize(file1) + os.path.getsize(file2)*8} bits\")\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Generate Huffman code"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Calculating Huffman code for file: own_corpus.txt...\n",
"First 3: r e s\n",
"Binary: 0100 001 0001\n",
"Calculating Huffman code for file: random_text_uniform_distribution.txt...\n",
"First 3: H W 8\n",
"Binary: 111010 001011 110101\n",
"Calculating Huffman code for file: random_text_geometric_distribution.txt...\n",
"First 3: b a a\n",
"Binary: 01 11 11\n",
"Calculating Huffman code for file: random_text_uniform_two_point_05_distribution.txt...\n",
"First 3: 0 0 0\n",
"Binary: 01 01 01\n",
"Calculating Huffman code for file: random_text_uniform_two_point_09_distribution.txt...\n",
"First 3: 1 1 1\n",
"Binary: 1 1 1\n"
]
}
],
"source": [
"from dahuffman import HuffmanCodec\n",
"\n",
"def encode_and_print(text):\n",
" codec = HuffmanCodec.from_data(text)\n",
" encoded = codec.encode(text)\n",
" table = codec.get_code_table()\n",
" table_str = str(table)\n",
"\n",
" first_3_letters = first_n_decoded_digits(encoded, codec, 3)\n",
" print(\"First 3:\", end=' ')\n",
" print(' '.join(first_3_letters))\n",
" print(\"Binary: \", end=' ')\n",
" print(' '.join(number_to_bin(table[letter][1], table[letter][0]) for letter in first_3_letters))\n",
" \n",
" return encoded, table_str\n",
"\n",
"def first_n_decoded_digits(encoded, codec, n):\n",
" decoded = codec.decode(encoded)\n",
" return decoded[:n]\n",
"\n",
"def save_to_bin(bytes, file_name):\n",
" with open(file_name, 'wb') as f:\n",
" f.write(bytes)\n",
"\n",
"def number_to_bin(number, nbits):\n",
" return bin(number)[2:].zfill(nbits)\n",
"\n",
"for file in file_names:\n",
" print(f\"Calculating Huffman code for file: {file}...\")\n",
" encoded, code_table = encode_and_print(open(file, 'r').read())\n",
" save_to_bin(encoded, file.replace('.txt', '.bin'))\n",
" save_to_bin(code_table.encode(), file.replace('.txt', '_codetable.bin'))\n",
"\n",
"# Nie do końca rozumiem jak mam zapisać ten codec."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Zadanie 2"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {},
"outputs": [],
"source": [
"import regex as re\n",
"from collections import Counter\n",
"from math import log\n",
"\n",
"def get_words(t):\n",
" for m in re.finditer(r'[\\p{L}0-9\\*]+', t):\n",
" yield m.group(0)\n",
"\n",
"def unigram_entropy(t):\n",
" counter = Counter(t)\n",
" total = sum(counter.values())\n",
" return -sum((p := count / total) * log(p, 2) for count in counter.values())"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9.27320212652544\n"
]
}
],
"source": [
"file_content = \"\"\n",
"with open(\"own_corpus.txt\", 'r') as file:\n",
" file_content = file.read()\n",
"\n",
"words = list(get_words(file_content))\n",
"print(unigram_entropy(words))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Zadanie 3"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"![title](DrzewoHuffmana.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "ai_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.15"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

BIN
Lab3/own_corpus.bin Normal file

Binary file not shown.

BIN
Lab3/own_corpus.tar.gz Normal file

Binary file not shown.

1978
Lab3/own_corpus.txt Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{'u': (5, 0), 'k': (8, 8), 'x': (8, 9), '2': (7, 5), 'q': (9, 24), '8': (9, 25), '5': (9, 26), '-': (9, 27), '1': (7, 7), 's': (4, 1), 'e': (3, 1), 'r': (4, 4), '0': (7, 40), '6': (9, 164), _EOF: (12, 1320), '_': (12, 1321), 'z': (11, 661), 'j': (10, 331), '7': (9, 166), '4': (9, 167), 'w': (7, 42), 'v': (7, 43), 'd': (5, 11), 'h': (5, 12), 'g': (6, 26), 'y': (7, 54), '9': (9, 220), '\n': (9, 221), '(': (8, 111), 'n': (4, 7), 'o': (4, 8), 'a': (4, 9), 'l': (5, 20), 'c': (5, 21), 'i': (4, 11), 't': (4, 12), ')': (8, 208), '3': (9, 418), '/': (9, 419), 'b': (7, 105), 'm': (6, 53), 'f': (6, 54), 'p': (6, 55), ' ': (3, 7)}

Binary file not shown.

Binary file not shown.

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{'p': (9, 0), 'u': (11, 4), 'E': (15, 80), _EOF: (16, 162), 'D': (16, 163), 'C': (15, 82), 'A': (15, 83), 'w': (13, 21), 'v': (12, 11), 'r': (10, 3), 'n': (8, 1), 'l': (7, 1), 'j': (6, 1), 'h': (5, 1), 'f': (4, 1), 'd': (3, 1), 'b': (2, 1), 'o': (9, 256), 'q': (10, 514), 'x': (13, 4120), 'y': (14, 8242), 'z': (14, 8243), 't': (12, 2061), 's': (11, 1031), 'm': (8, 129), 'k': (7, 65), 'i': (6, 33), 'g': (5, 17), 'e': (4, 9), 'c': (3, 5), 'a': (2, 3)}

Binary file not shown.

Binary file not shown.

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{'A': (5, 0), 'b': (5, 1), _EOF: (7, 8), 'O': (7, 9), 'Y': (6, 5), '9': (6, 6), 't': (6, 7), '1': (6, 8), 'X': (6, 9), 'e': (6, 10), 'W': (6, 11), '4': (6, 12), '3': (6, 13), 'o': (6, 14), 'q': (6, 15), 'T': (6, 16), 'l': (6, 17), 'J': (6, 18), 'y': (6, 19), '6': (6, 20), 'F': (6, 21), 'G': (6, 22), 'Q': (6, 23), 'K': (6, 24), 'N': (6, 25), 'S': (6, 26), 'f': (6, 27), '5': (6, 28), 'L': (6, 29), 'd': (6, 30), 'D': (6, 31), 'M': (6, 32), 'n': (6, 33), 'u': (6, 34), 'B': (6, 35), '2': (6, 36), 'a': (6, 37), '0': (6, 38), '7': (6, 39), 'P': (6, 40), 'E': (6, 41), 'j': (6, 42), 'z': (6, 43), 'C': (6, 44), 'h': (6, 45), 'i': (6, 46), 'c': (6, 47), 'm': (6, 48), 'R': (6, 49), 'k': (6, 50), 'I': (6, 51), 'U': (6, 52), '8': (6, 53), 'Z': (6, 54), 'g': (6, 55), 's': (6, 56), 'V': (6, 57), 'H': (6, 58), 'w': (6, 59), 'r': (6, 60), 'x': (6, 61), 'p': (6, 62), 'v': (6, 63)}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{_EOF: (2, 0), '0': (2, 1), '1': (1, 1)}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{_EOF: (2, 0), '0': (2, 1), '1': (1, 1)}