diff --git a/skrypcik.py b/skrypcik.py deleted file mode 100644 index 8daddf0..0000000 --- a/skrypcik.py +++ /dev/null @@ -1,53 +0,0 @@ -import pandas as pd - - -def correct_labels(input_file, output_file): - df = pd.read_csv(input_file, sep="\t", names=["Text"]) - - corrected_lines = [] - - for line in df["Text"]: - tokens = line.split(" ") - corrected_tokens = [] - previous_token = "O" - - for token in tokens: - if ( - token == "I-ORG" - and previous_token != "B-ORG" - and previous_token != "I-ORG" - ): - corrected_tokens.append("B-ORG") - elif ( - token == "I-PER" - and previous_token != "B-PER" - and previous_token != "I-PER" - ): - corrected_tokens.append("B-PER") - elif ( - token == "I-LOC" - and previous_token != "B-LOC" - and previous_token != "I-LOC" - ): - corrected_tokens.append("B-LOC") - elif ( - token == "I-MISC" - and previous_token != "B-MISC" - and previous_token != "I-MISC" - ): - corrected_tokens.append("B-MISC") - else: - corrected_tokens.append(token) - - previous_token = token - - corrected_line = " ".join(corrected_tokens) - corrected_lines.append(corrected_line) - - df["Text"] = corrected_lines - df.to_csv(output_file, sep="\t", index=False, header=False) - - -input_file = "test-A/out.tsv" -output_file = "out-A.tsv" -correct_labels(input_file, output_file) \ No newline at end of file