Fixed problem with code
This commit is contained in:
parent
3d9f5c9950
commit
7b1a3da3a6
8
code.py
8
code.py
@ -19,10 +19,10 @@ def create_train(input,output1,output0):
|
||||
def tokenize(input,output):
|
||||
with open(input,'rt',encoding="utf8") as input_f, open(output,'w',encoding="utf-8") as file:
|
||||
for line in input_f:
|
||||
text = line.lower()
|
||||
text = re.sub(r'\\n+', " ", text)
|
||||
text = re.sub(r'http\S+', " ", text)
|
||||
text = re.sub(r'\/[a-z]\/', " ", text)
|
||||
#text = line.lower()
|
||||
#text = re.sub(r'\\n+', " ", text)
|
||||
text = re.sub(r'http\S+', " ", line)
|
||||
#text = re.sub(r'\/[a-z]\/', " ", text)
|
||||
#text = re.sub(r'\s{2,}', " ", text)
|
||||
#text = re.sub(r'\W\w{1,3}\W|\A\w{1,3}\W', " ", text)
|
||||
#text = re.sub(r'\W\w{1,3}\W|\A\w{1,3}\W', " ", text)
|
||||
|
8660
dev-0/out.tsv
8660
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
8014
test-A/out.tsv
8014
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user