4
This commit is contained in:
parent
2be2a96fe1
commit
e49b8826cb
20822
dev-0/out.tsv
20822
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
16
run.py
16
run.py
@ -30,20 +30,8 @@ model = defaultdict(lambda: defaultdict(lambda: 0))
|
||||
|
||||
|
||||
def clean(text):
|
||||
text = str(text)
|
||||
text = (
|
||||
unicodedata.normalize("NFKD", text)
|
||||
.encode("ascii", "ignore")
|
||||
.decode("utf-8", "ignore")
|
||||
)
|
||||
text = re.sub("<.*?>", " ", text)
|
||||
text = text.translate(str.maketrans(" ", " ", string.punctuation))
|
||||
text = re.sub("[^a-zA-Z]", " ", text)
|
||||
text = re.sub("\n", " ", text)
|
||||
text = text.lower()
|
||||
text = " ".join(text.split())
|
||||
return text
|
||||
|
||||
text = str(text).lower().replace("-\\n", "").replace("\\n", " ")
|
||||
return re.sub(r"\p{P}", "", text)
|
||||
|
||||
def train_model(data):
|
||||
for _, row in data.iterrows():
|
||||
|
14574
test-A/out.tsv
14574
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user