diff --git a/lab/lab_11.ipynb b/lab/lab_11.ipynb index 0a8ce14..89f705c 100644 --- a/lab/lab_11.ipynb +++ b/lab/lab_11.ipynb @@ -52,13 +52,22 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "german-dispute", "metadata": {}, "outputs": [], "source": [ "def sentence_split(text):\n", - " return []" + " def purge(text_l: str) -> str:\n", + " return text_l.strip('.').strip()\n", + " index = 0\n", + " result = []\n", + " for match in regex.finditer(r'\\. \\p{Lu}|\\n', text):\n", + " result.append(purge(text[index:match.start(0)]))\n", + " index = match.start(0)\n", + " result.append(purge(text[index:len(text)]))\n", + "\n", + " return result" ] }, { @@ -69,6 +78,14 @@ "### Ćwiczenie 2: Uruchom powyższy algorytm na treści wybranej przez siebie strony internetowej (do ściągnięcia treści strony wykorzystaj kod z laboratoriów nr 7). Zidentyfikuj co najmniej dwa wyjątki od ogólnej reguły podziału na segmenty i ulepsz algorytm." ] }, + { + "cell_type": "markdown", + "id": "20bc0bf7-35b7-44e5-8750-c22e6de9d048", + "metadata": {}, + "source": [ + "Dwa wyjatki to zdania zakończone wykrzyknikiem i zdania zakończone znakiem zapytania" + ] + }, { "cell_type": "code", "execution_count": 3, @@ -76,8 +93,17 @@ "metadata": {}, "outputs": [], "source": [ - "def sentence_split_enhanced(text):\n", - " return []" + "def sentence_split(text):\n", + " def purge(text_l: str) -> str:\n", + " return text_l.strip('.').strip('?').strip('!').strip()\n", + " index = 0\n", + " result = []\n", + " for match in regex.finditer(r'(\\.|\\?|\\!) \\p{Lu}|\\n', text):\n", + " result.append(purge(text[index:match.start(0)]))\n", + " index = match.start(0)\n", + " result.append(purge(text[index:len(text)]))\n", + "\n", + " return result" ] }, { @@ -117,6 +143,14 @@ "Wyjściem z Hunaligna jest plik w specjalnym formacie Hunaligna. Problem jednak w tym, że niestety nie można go w prosty sposób zaimportować do jakiegokolwiek narzędzia typu CAT. Potrzebna jest konwersja do któregoś z bardziej popularnych formatów, np. XLIFF." ] }, + { + "cell_type": "markdown", + "id": "80360005-5110-4f83-bfd6-dbe22a1d5b5b", + "metadata": {}, + "source": [ + "## *Linki do pobrania tego progamu(ftp://ftp.mokk.bme.hu/Hunglish/src/hunalign/latest/hunalign-1.1-windows.zip), dostępne w README na githubie, nie działają.*" + ] + }, { "cell_type": "markdown", "id": "divided-chain", @@ -187,15 +221,12 @@ "metadata": { "author": "Rafał Jaworski", "email": "rjawor@amu.edu.pl", - "lang": "pl", - "subtitle": "11. Urównoleglanie", - "title": "Komputerowe wspomaganie tłumaczenia", - "year": "2021", "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, + "lang": "pl", "language_info": { "codemirror_mode": { "name": "ipython", @@ -206,8 +237,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" - } + "version": "3.10.4" + }, + "subtitle": "11. Urównoleglanie", + "title": "Komputerowe wspomaganie tłumaczenia", + "year": "2021" }, "nbformat": 4, "nbformat_minor": 5 diff --git a/lab/lab_12.ipynb b/lab/lab_12.ipynb index c4dc223..597f201 100644 --- a/lab/lab_12.ipynb +++ b/lab/lab_12.ipynb @@ -96,6 +96,26 @@ "### Ćwiczenie 1: Wykorzystując powyższy kod napisz keylogger, który zapisuje wszystkie uderzenia w klawisze do pliku. Format pliku jest dowolny, każdy wpis musi zawierać precyzyjną godzinę uderzenia oraz uderzony klawisz. Uruchom program i przepisz paragraf dowolnie wybranego tekstu." ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "8663ef15-88a0-4bb5-aff9-f19cbb3178c1", + "metadata": {}, + "outputs": [], + "source": [ + "import keyboard\n", + "\n", + "\n", + "def report_key(event: keyboard.KeyboardEvent):\n", + " file = open('test.txt', 'a')\n", + " file.write(f'[{event.time}] {event.name}\\n')\n", + " file.close()\n", + "\n", + "\n", + "keyboard.on_release(callback=report_key)\n", + "keyboard.wait()" + ] + }, { "cell_type": "markdown", "id": "valuable-bearing", @@ -120,7 +140,40 @@ "outputs": [], "source": [ "def calculate_typing_speed():\n", - " return 0" + " import re\n", + " import numpy\n", + "\n", + " def parse(line_l: str) -> (float, str):\n", + " res = re.findall(r'(\\d+.\\d+)|([a-zA-Z,.]+)', ''.join(line_l.split()))\n", + " return float(res[0][0]), res[1][1]\n", + "\n", + " file = open('test.txt', 'r')\n", + " time_per_word = []\n", + " time_per_character = []\n", + " local_time_per_word = []\n", + "\n", + " prev_char_timestamp = None\n", + " for line in file:\n", + " time, key = parse(line)\n", + " if prev_char_timestamp is None or time - prev_char_timestamp > 5:\n", + " prev_char_timestamp = time\n", + " local_time_per_word = []\n", + " continue\n", + " elapsed = time - prev_char_timestamp\n", + " time_per_character.append(elapsed)\n", + " if key == 'space' or key == 'enter' or key == ',' or key == '.':\n", + " if len(local_time_per_word) > 0:\n", + " time_per_word.append(numpy.sum(local_time_per_word))\n", + " local_time_per_word = []\n", + " time_per_character.append(elapsed)\n", + " prev_char_timestamp = time\n", + " continue\n", + " local_time_per_word.append(elapsed)\n", + " prev_char_timestamp = time\n", + " file.close()\n", + " time_per_word.append(numpy.sum(local_time_per_word))\n", + " \n", + " return 60 / numpy.average(time_per_character), 60 / numpy.average(time_per_word)" ] }, { @@ -147,22 +200,57 @@ "outputs": [], "source": [ "def find_pauses():\n", - " return []" + " import re\n", + "\n", + " def parse(line_l: str) -> (float, str):\n", + " res = re.findall(r'(\\d+.\\d+)|([a-zA-Z,.]+)', ''.join(line_l.split()))\n", + " return float(res[0][0]), res[1][1]\n", + "\n", + " file = open('test.txt', 'r')\n", + " stops = []\n", + " stop_reporting_time = 1\n", + "\n", + " prev_char_timestamp = None\n", + " lines = file.readlines()\n", + " file.close()\n", + " for i in range(len(lines)):\n", + " time, key = parse(lines[i])\n", + " if prev_char_timestamp is None:\n", + " prev_char_timestamp = time\n", + " continue\n", + " elapsed = time - prev_char_timestamp\n", + " if elapsed > stop_reporting_time:\n", + " context_start = max(0, i - 20)\n", + " context_end = min(len(lines), i + 20)\n", + " context_before = ''\n", + " context_after = ''\n", + " for j in range(context_start, i):\n", + " time_l, key_l = parse(lines[j])\n", + " context_before += key_l\n", + " for j in range(i, context_end):\n", + " time_l, key_l = parse(lines[j])\n", + " context_after += key_l\n", + " stops.append((elapsed, (context_before, context_after)))\n", + " prev_char_timestamp = time\n", + "\n", + " def stop_sort(record: tuple):\n", + " return record[0]\n", + "\n", + " stops.sort(reverse=True, key=stop_sort)\n", + " \n", + " return stops" ] } ], "metadata": { "author": "Rafał Jaworski", "email": "rjawor@amu.edu.pl", - "lang": "pl", - "subtitle": "12. Key logging", - "title": "Komputerowe wspomaganie tłumaczenia", - "year": "2021", "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, + "lang": "pl", "language_info": { "codemirror_mode": { "name": "ipython", @@ -173,8 +261,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" - } + "version": "3.10.4" + }, + "subtitle": "12. Key logging", + "title": "Komputerowe wspomaganie tłumaczenia", + "year": "2021" }, "nbformat": 4, "nbformat_minor": 5 diff --git a/lab/lab_13-14.ipynb b/lab/lab_13-14.ipynb index 740d7d9..664a1e5 100644 --- a/lab/lab_13-14.ipynb +++ b/lab/lab_13-14.ipynb @@ -201,7 +201,7 @@ "author": "Rafał Jaworski", "email": "rjawor@amu.edu.pl", "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -216,7 +216,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.10.4" }, "subtitle": "13,14. Korekta pisowni", "title": "Komputerowe wspomaganie tłumaczenia",