forked from bfijalkowski/KWT-2024
lab 11-14
This commit is contained in:
parent
824f7d373d
commit
a3dca39152
@ -52,13 +52,22 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "german-dispute",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def sentence_split(text):\n",
|
||||
" return []"
|
||||
" def purge(text_l: str) -> str:\n",
|
||||
" return text_l.strip('.').strip()\n",
|
||||
" index = 0\n",
|
||||
" result = []\n",
|
||||
" for match in regex.finditer(r'\\. \\p{Lu}|\\n', text):\n",
|
||||
" result.append(purge(text[index:match.start(0)]))\n",
|
||||
" index = match.start(0)\n",
|
||||
" result.append(purge(text[index:len(text)]))\n",
|
||||
"\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -69,6 +78,14 @@
|
||||
"### Ćwiczenie 2: Uruchom powyższy algorytm na treści wybranej przez siebie strony internetowej (do ściągnięcia treści strony wykorzystaj kod z laboratoriów nr 7). Zidentyfikuj co najmniej dwa wyjątki od ogólnej reguły podziału na segmenty i ulepsz algorytm."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "20bc0bf7-35b7-44e5-8750-c22e6de9d048",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Dwa wyjatki to zdania zakończone wykrzyknikiem i zdania zakończone znakiem zapytania"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
@ -76,8 +93,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def sentence_split_enhanced(text):\n",
|
||||
" return []"
|
||||
"def sentence_split(text):\n",
|
||||
" def purge(text_l: str) -> str:\n",
|
||||
" return text_l.strip('.').strip('?').strip('!').strip()\n",
|
||||
" index = 0\n",
|
||||
" result = []\n",
|
||||
" for match in regex.finditer(r'(\\.|\\?|\\!) \\p{Lu}|\\n', text):\n",
|
||||
" result.append(purge(text[index:match.start(0)]))\n",
|
||||
" index = match.start(0)\n",
|
||||
" result.append(purge(text[index:len(text)]))\n",
|
||||
"\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -117,6 +143,14 @@
|
||||
"Wyjściem z Hunaligna jest plik w specjalnym formacie Hunaligna. Problem jednak w tym, że niestety nie można go w prosty sposób zaimportować do jakiegokolwiek narzędzia typu CAT. Potrzebna jest konwersja do któregoś z bardziej popularnych formatów, np. XLIFF."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "80360005-5110-4f83-bfd6-dbe22a1d5b5b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## *Linki do pobrania tego progamu(ftp://ftp.mokk.bme.hu/Hunglish/src/hunalign/latest/hunalign-1.1-windows.zip), dostępne w README na githubie, nie działają.*"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "divided-chain",
|
||||
@ -187,15 +221,12 @@
|
||||
"metadata": {
|
||||
"author": "Rafał Jaworski",
|
||||
"email": "rjawor@amu.edu.pl",
|
||||
"lang": "pl",
|
||||
"subtitle": "11. Urównoleglanie",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
"year": "2021",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"lang": "pl",
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
@ -206,8 +237,11 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
}
|
||||
"version": "3.10.4"
|
||||
},
|
||||
"subtitle": "11. Urównoleglanie",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
"year": "2021"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
|
109
lab/lab_12.ipynb
109
lab/lab_12.ipynb
@ -96,6 +96,26 @@
|
||||
"### Ćwiczenie 1: Wykorzystując powyższy kod napisz keylogger, który zapisuje wszystkie uderzenia w klawisze do pliku. Format pliku jest dowolny, każdy wpis musi zawierać precyzyjną godzinę uderzenia oraz uderzony klawisz. Uruchom program i przepisz paragraf dowolnie wybranego tekstu."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8663ef15-88a0-4bb5-aff9-f19cbb3178c1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import keyboard\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def report_key(event: keyboard.KeyboardEvent):\n",
|
||||
" file = open('test.txt', 'a')\n",
|
||||
" file.write(f'[{event.time}] {event.name}\\n')\n",
|
||||
" file.close()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"keyboard.on_release(callback=report_key)\n",
|
||||
"keyboard.wait()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "valuable-bearing",
|
||||
@ -120,7 +140,40 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def calculate_typing_speed():\n",
|
||||
" return 0"
|
||||
" import re\n",
|
||||
" import numpy\n",
|
||||
"\n",
|
||||
" def parse(line_l: str) -> (float, str):\n",
|
||||
" res = re.findall(r'(\\d+.\\d+)|([a-zA-Z,.]+)', ''.join(line_l.split()))\n",
|
||||
" return float(res[0][0]), res[1][1]\n",
|
||||
"\n",
|
||||
" file = open('test.txt', 'r')\n",
|
||||
" time_per_word = []\n",
|
||||
" time_per_character = []\n",
|
||||
" local_time_per_word = []\n",
|
||||
"\n",
|
||||
" prev_char_timestamp = None\n",
|
||||
" for line in file:\n",
|
||||
" time, key = parse(line)\n",
|
||||
" if prev_char_timestamp is None or time - prev_char_timestamp > 5:\n",
|
||||
" prev_char_timestamp = time\n",
|
||||
" local_time_per_word = []\n",
|
||||
" continue\n",
|
||||
" elapsed = time - prev_char_timestamp\n",
|
||||
" time_per_character.append(elapsed)\n",
|
||||
" if key == 'space' or key == 'enter' or key == ',' or key == '.':\n",
|
||||
" if len(local_time_per_word) > 0:\n",
|
||||
" time_per_word.append(numpy.sum(local_time_per_word))\n",
|
||||
" local_time_per_word = []\n",
|
||||
" time_per_character.append(elapsed)\n",
|
||||
" prev_char_timestamp = time\n",
|
||||
" continue\n",
|
||||
" local_time_per_word.append(elapsed)\n",
|
||||
" prev_char_timestamp = time\n",
|
||||
" file.close()\n",
|
||||
" time_per_word.append(numpy.sum(local_time_per_word))\n",
|
||||
" \n",
|
||||
" return 60 / numpy.average(time_per_character), 60 / numpy.average(time_per_word)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -147,22 +200,57 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def find_pauses():\n",
|
||||
" return []"
|
||||
" import re\n",
|
||||
"\n",
|
||||
" def parse(line_l: str) -> (float, str):\n",
|
||||
" res = re.findall(r'(\\d+.\\d+)|([a-zA-Z,.]+)', ''.join(line_l.split()))\n",
|
||||
" return float(res[0][0]), res[1][1]\n",
|
||||
"\n",
|
||||
" file = open('test.txt', 'r')\n",
|
||||
" stops = []\n",
|
||||
" stop_reporting_time = 1\n",
|
||||
"\n",
|
||||
" prev_char_timestamp = None\n",
|
||||
" lines = file.readlines()\n",
|
||||
" file.close()\n",
|
||||
" for i in range(len(lines)):\n",
|
||||
" time, key = parse(lines[i])\n",
|
||||
" if prev_char_timestamp is None:\n",
|
||||
" prev_char_timestamp = time\n",
|
||||
" continue\n",
|
||||
" elapsed = time - prev_char_timestamp\n",
|
||||
" if elapsed > stop_reporting_time:\n",
|
||||
" context_start = max(0, i - 20)\n",
|
||||
" context_end = min(len(lines), i + 20)\n",
|
||||
" context_before = ''\n",
|
||||
" context_after = ''\n",
|
||||
" for j in range(context_start, i):\n",
|
||||
" time_l, key_l = parse(lines[j])\n",
|
||||
" context_before += key_l\n",
|
||||
" for j in range(i, context_end):\n",
|
||||
" time_l, key_l = parse(lines[j])\n",
|
||||
" context_after += key_l\n",
|
||||
" stops.append((elapsed, (context_before, context_after)))\n",
|
||||
" prev_char_timestamp = time\n",
|
||||
"\n",
|
||||
" def stop_sort(record: tuple):\n",
|
||||
" return record[0]\n",
|
||||
"\n",
|
||||
" stops.sort(reverse=True, key=stop_sort)\n",
|
||||
" \n",
|
||||
" return stops"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"author": "Rafał Jaworski",
|
||||
"email": "rjawor@amu.edu.pl",
|
||||
"lang": "pl",
|
||||
"subtitle": "12. Key logging",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
"year": "2021",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"lang": "pl",
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
@ -173,8 +261,11 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
}
|
||||
"version": "3.10.4"
|
||||
},
|
||||
"subtitle": "12. Key logging",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
"year": "2021"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
|
@ -201,7 +201,7 @@
|
||||
"author": "Rafał Jaworski",
|
||||
"email": "rjawor@amu.edu.pl",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -216,7 +216,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.10.4"
|
||||
},
|
||||
"subtitle": "13,14. Korekta pisowni",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
Loading…
Reference in New Issue
Block a user