forked from bfijalkowski/KWT-2024
finished
This commit is contained in:
parent
0c0ef36a89
commit
6040e09fa8
29
lab/java/sample_class.java
Normal file
29
lab/java/sample_class.java
Normal file
@ -0,0 +1,29 @@
|
||||
/**
|
||||
* This is a simple Java class demonstrating grammatical errors in comments.
|
||||
*/
|
||||
public class SampleClass {
|
||||
|
||||
// This is a incorrect comment
|
||||
private int sampleField;
|
||||
|
||||
/**
|
||||
* This method does something very simple.
|
||||
* @param value This is a parameter that take a integer value.
|
||||
*/
|
||||
public void doSomething(int value) {
|
||||
// This is a method that does something
|
||||
sampleField = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns the value of sample field.
|
||||
* It should be a integer value.
|
||||
* @return the value of sampleField
|
||||
*/
|
||||
public int getSampleField() {
|
||||
// Return the value of the sample field
|
||||
return sampleField;
|
||||
}
|
||||
|
||||
// Another bad comment here
|
||||
}
|
295
lab/lab_15.ipynb
295
lab/lab_15.ipynb
@ -79,16 +79,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 2,
|
||||
"id": "relative-anaheim",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Downloading LanguageTool 6.4: 100%|██████████| 246M/246M [00:03<00:00, 66.2MB/s] \n",
|
||||
"Unzipping /var/folders/x3/mzn7dtf55q5355g3c5003g_h0000gn/T/tmp5m6kri4j.zip to /Users/adamstelmaszyk/.cache/language_tool_python.\n",
|
||||
"Downloaded https://www.languagetool.org/download/LanguageTool-6.4.zip to /Users/adamstelmaszyk/.cache/language_tool_python.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Match({'ruleId': 'EN_A_VS_AN', 'message': 'Use “an” instead of ‘a’ if the following word starts with a vowel sound, e.g. ‘an article’, ‘an hour’.', 'replacements': ['an'], 'offsetInContext': 16, 'context': 'A sentence with a error in the Hitchhiker’s Guide tot he ...', 'offset': 16, 'errorLength': 1, 'category': 'MISC', 'ruleIssueType': 'misspelling', 'sentence': 'A sentence with a error in the Hitchhiker’s Guide tot he Galaxy'}),\n",
|
||||
" Match({'ruleId': 'TOT_HE', 'message': 'Did you mean “to the”?', 'replacements': ['to the'], 'offsetInContext': 43, 'context': '... with a error in the Hitchhiker’s Guide tot he Galaxy', 'offset': 50, 'errorLength': 6, 'category': 'TYPOS', 'ruleIssueType': 'misspelling', 'sentence': 'A sentence with a error in the Hitchhiker’s Guide tot he Galaxy'})]\n"
|
||||
"[Match({'ruleId': 'EN_A_VS_AN', 'message': 'Use “an” instead of ‘a’ if the following word starts with a vowel sound, e.g. ‘an article’, ‘an hour’.', 'replacements': ['an'], 'offsetInContext': 16, 'context': 'A sentence with a error in the Hitchhiker’s Guide tot he ...', 'offset': 16, 'errorLength': 1, 'category': 'MISC', 'ruleIssueType': 'misspelling', 'sentence': \"A sentence with a error in the Hitchhiker's Guide tot he Galaxy\"}),\n",
|
||||
" Match({'ruleId': 'TOT_HE', 'message': 'Did you mean “to the”?', 'replacements': ['to the'], 'offsetInContext': 43, 'context': '... with a error in the Hitchhiker’s Guide tot he Galaxy', 'offset': 50, 'errorLength': 6, 'category': 'TYPOS', 'ruleIssueType': 'misspelling', 'sentence': \"A sentence with a error in the Hitchhiker's Guide tot he Galaxy\"})]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -122,13 +131,77 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 5,
|
||||
"id": "sound-teaching",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
|
||||
" Specjalistk...rce znajdziesz\n",
|
||||
"\n",
|
||||
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
|
||||
" Specjalistka ds. mediów społecznośc...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...absolwentka Wydziału Anglistyki. UAM na Pyrkonie 2024. Zapraszamy do obejrzenia fotorela...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...topnia oraz jednolite magisterskie. . . Scientific Advisory Board UAM obradowało po raz ko...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ... jednolite magisterskie. . . Scientific Advisory Board UAM obradowało po raz kolejny. Po...\n",
|
||||
"\n",
|
||||
"Error: Nietypowa kombinacja małych i dużych liter. Czy nie powinno być: \"beststudentcamp\"?\n",
|
||||
"Context: ... laureatów konkursu Szkoła Letnia UAM – BESTStudentCAMP 2024. Poznaj listę nagrodzonych! Stworz...\n",
|
||||
"\n",
|
||||
"Error: Spacje wokół dywizu (w przeciwieństwie do myślnika) są zbędne: \"UAM-Pyrkon\"; jeśli to miał być myślnik, to należy napisać \"UAM — Pyrkon\".\n",
|
||||
"Context: ...wszy etap wdrażania planu. Fantastyczny UAM - Pyrkon 2024 „Człowiek w rzeczywistości AI: roz...\n",
|
||||
"\n",
|
||||
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
|
||||
"Context: ...rs recytatorski wierszy Czesława Miłosza . Uniwe...\n",
|
||||
"\n",
|
||||
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
|
||||
"Context: ...a i nauczanie wzajemnie się przenikają. . Jedna z inicjatyw UAM, w ramach której...\n",
|
||||
"\n",
|
||||
"Error: Nie wstawiamy spacji przed kropką\n",
|
||||
"Context: ...zej uczelni Zobacz nasze ciekawe obiekty . Uniwer...\n",
|
||||
"\n",
|
||||
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
|
||||
"Context: ...asopismach ogólnopolskich i światowych. . Na UA...\n",
|
||||
"\n",
|
||||
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
|
||||
"Context: ...tecie im. Adama Mickiewicza w Poznaniu. . Na UA...\n",
|
||||
"\n",
|
||||
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
|
||||
"Context: ...a także ogłoszone zamówienia publiczne. . Uniwersytet im. Adama Mickiewicza w Po...\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from language_tool_python import LanguageTool\n",
|
||||
"\n",
|
||||
"def find_errors(website_url):\n",
|
||||
" return []"
|
||||
" tool = LanguageTool('pl-PL')\n",
|
||||
"\n",
|
||||
" response = requests.get(website_url)\n",
|
||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||||
"\n",
|
||||
" text = ' '.join([p.get_text() for p in soup.find_all('p')])\n",
|
||||
"\n",
|
||||
" matches = tool.check(text)\n",
|
||||
" \n",
|
||||
" errors = [match for match in matches if 'Prawidłowa' not in match.ruleIssueType]\n",
|
||||
" \n",
|
||||
" return errors\n",
|
||||
"\n",
|
||||
"website_url = 'https://amu.edu.pl' \n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -141,13 +214,215 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 7,
|
||||
"id": "settled-armor",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // This is a incorrect comment\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // This is a incorrect comment\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // This is a incorrect comment\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // This is a method that does something\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // This is a method that does something\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // This is a method that does something\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // This is a method that does something\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // This is a method that does something\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // This is a method that does something\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // Return the value of the sample field\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // Return the value of the sample field\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // Return the value of the sample field\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // Return the value of the sample field\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // Another bad comment here\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: // Another bad comment here\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This is a simple Java class demonstrating gr...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This is a simple Java class demonstrating gramm...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This is a simple Java class demonstrating grammatical er...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This is a simple Java class demonstrating grammatical errors in com...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This is a simple Java class demonstrating grammatical errors in comments. */\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...is is a simple Java class demonstrating grammatical errors in comments. */\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...le Java class demonstrating grammatical errors in comments. */\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...ass demonstrating grammatical errors in comments. */\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This method does something very simple. ...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This method does something very simple. * @par...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This method does something very simple. * @param va...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This method does something very simple. * @param value This i...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This method does something very simple. * @param value This is a p...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ... * This method does something very simple. * @param value This is a paramete...\n",
|
||||
"\n",
|
||||
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
|
||||
"Context: ...This method does something very simple. * @param value This is a parameter that ...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...thod does something very simple. * @param value This is a parameter that take a i...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...es something very simple. * @param value This is a parameter that take a integer...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...ething very simple. * @param value This is a parameter that take a integer valu...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...g very simple. * @param value This is a parameter that take a integer value. ...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...y simple. * @param value This is a parameter that take a integer value. */\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ... * @param value This is a parameter that take a integer value. */\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...* @param value This is a parameter that take a integer value. */\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...m value This is a parameter that take a integer value. */\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...This is a parameter that take a integer value. */\n",
|
||||
"\n",
|
||||
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
|
||||
"Context: ... a parameter that take a integer value. */\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This method returns the value of sample fiel...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This method returns the value of sample field. ...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This method returns the value of sample field. * It sh...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This method returns the value of sample field. * It should...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: /** * This method returns the value of sample field. * It should be a ...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...** * This method returns the value of sample field. * It should be a int...\n",
|
||||
"\n",
|
||||
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
|
||||
"Context: ...thod returns the value of sample field. * It should be a integer value. * @...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...turns the value of sample field. * It should be a integer value. * @retu...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...ns the value of sample field. * It should be a integer value. * @return the ...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ... of sample field. * It should be a integer value. * @return the value of samp...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...le field. * It should be a integer value. * @return the value of sampleFiel...\n",
|
||||
"\n",
|
||||
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
|
||||
"Context: ...d. * It should be a integer value. * @return the value of sampleField ...\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ... * It should be a integer value. * @return the value of sampleField */\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...ould be a integer value. * @return the value of sampleField */\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ... be a integer value. * @return the value of sampleField */\n",
|
||||
"\n",
|
||||
"Error: Wykryto prawdopodobny błąd pisowni\n",
|
||||
"Context: ...integer value. * @return the value of sampleField */\n",
|
||||
"\n",
|
||||
"Error: Nietypowa kombinacja małych i dużych liter. Czy nie powinno być: \"samplefield\"?\n",
|
||||
"Context: ...eger value. * @return the value of sampleField */\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"from language_tool_python import LanguageTool\n",
|
||||
"\n",
|
||||
"def correct_java_grammar(java_file_path):\n",
|
||||
" return []"
|
||||
" tool = LanguageTool('pl-PL')\n",
|
||||
"\n",
|
||||
" with open(java_file_path, 'r', encoding='utf-8') as file:\n",
|
||||
" java_code = file.read()\n",
|
||||
"\n",
|
||||
" single_line_comments = re.findall(r'//.*', java_code)\n",
|
||||
" javadocs = re.findall(r'/\\*\\*.*?\\*/', java_code, re.DOTALL)\n",
|
||||
" \n",
|
||||
" all_comments = single_line_comments + javadocs\n",
|
||||
"\n",
|
||||
" errors = []\n",
|
||||
" for comment in all_comments:\n",
|
||||
" matches = tool.check(comment)\n",
|
||||
" errors.extend(matches)\n",
|
||||
"\n",
|
||||
" return errors\n",
|
||||
"\n",
|
||||
"java_file_path = 'java/sample_class.java'\n",
|
||||
"errors = correct_java_grammar(java_file_path)\n",
|
||||
"\n",
|
||||
"for error in errors:\n",
|
||||
" print(f\"Error: {error.message}\\nContext: {error.context}\\n\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -170,7 +445,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.11.9"
|
||||
},
|
||||
"subtitle": "15. Korekta gramatyczna",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
Loading…
Reference in New Issue
Block a user