import re
import pandas as pd
from transformers import pipeline

def correct_labels(input_file, output_file):
 df = pd.read_csv(input_file, sep="\t", names=["Text"])

 corrected_lines = []

 for line in df["Text"]:
 tokens = line.split(" ")
 corrected_tokens = []
 previous_token = "O"

 for token in tokens:
 if (
 token == "I-ORG"
 and previous_token != "B-ORG"
 and previous_token != "I-ORG"
 ):
 corrected_tokens.append("B-ORG")
 elif (
 token == "I-PER"
 and previous_token != "B-PER"
 and previous_token != "I-PER"
 ):
 corrected_tokens.append("B-PER")
 elif (
 token == "I-LOC"
 and previous_token != "B-LOC"
 and previous_token != "I-LOC"
 ):
 corrected_tokens.append("B-LOC")
 elif (
 token == "I-MISC"
 and previous_token != "B-MISC"
 and previous_token != "I-MISC"
 ):
 corrected_tokens.append("B-MISC")
 else:
 corrected_tokens.append(token)

 previous_token = token

 corrected_line = " ".join(corrected_tokens)
 corrected_lines.append(corrected_line)

 df["Text"] = corrected_lines
 df.to_csv(output_file, sep="\t", index=False, header=False) ner_model = pipeline(
 "ner",
 model="dslim/bert-base-NER",
 aggregation_strategy="simple",
 device=0,
)