151636
This commit is contained in:
parent
2a9545c47d
commit
47d69670a7
10403
dev-0/out.tsv
Normal file
10403
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
629
run.py
Normal file
629
run.py
Normal file
@ -0,0 +1,629 @@
|
||||
|
||||
# In[1]:
|
||||
|
||||
|
||||
import os
|
||||
import lzma
|
||||
|
||||
folders = ["./challenging-america-word-gap-prediction/dev-0",
|
||||
"./challenging-america-word-gap-prediction/test-A",
|
||||
"./challenging-america-word-gap-prediction/train"]
|
||||
|
||||
for folder in folders:
|
||||
for file in os.listdir(folder):
|
||||
if file.endswith(".tsv.xz"):
|
||||
file_path = os.path.join(folder, file)
|
||||
output_path = os.path.splitext(file_path)[0] # Remove the .xz extension
|
||||
with lzma.open(file_path, "rb") as compressed_file:
|
||||
with open(output_path, "wb") as output_file:
|
||||
output_file.write(compressed_file.read())
|
||||
|
||||
|
||||
# In[5]:
|
||||
|
||||
|
||||
import nltk
|
||||
nltk.download('punkt')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
in_df = pd.read_csv('./challenging-america-word-gap-prediction/train/in.tsv', sep='\t', header=None, on_bad_lines='warn')
|
||||
expected_df = pd.read_csv('./challenging-america-word-gap-prediction/train/expected.tsv', sep='\t', header=None, on_bad_lines='warn')
|
||||
|
||||
|
||||
print("in_df:")
|
||||
print(in_df.head())
|
||||
print("\nexpected_df:")
|
||||
print(expected_df.head())
|
||||
print("\nhate_speech_info_df:")
|
||||
|
||||
|
||||
print("\nin_df info:")
|
||||
print(in_df.info())
|
||||
print("\nexpected_df info:")
|
||||
print(expected_df.info())
|
||||
|
||||
|
||||
# In[2]:
|
||||
|
||||
|
||||
columns_to_drop = [0, 1, 2, 3, 4, 5]
|
||||
in_df.drop(columns_to_drop, axis=1, inplace=True)
|
||||
|
||||
|
||||
in_df.columns = ['text_1', 'text_2']
|
||||
|
||||
|
||||
|
||||
# In[3]:
|
||||
|
||||
|
||||
print(in_df.head())
|
||||
|
||||
|
||||
# In[4]:
|
||||
|
||||
|
||||
in_df['text_1'].fillna('', inplace=True)
|
||||
in_df['text_2'].fillna('', inplace=True)
|
||||
|
||||
|
||||
# In[5]:
|
||||
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def replace_newline(text):
|
||||
if isinstance(text, str):
|
||||
return text.replace('\\n', ' ')
|
||||
return text
|
||||
|
||||
tqdm.pandas(desc="Replacing '\\n' in 'text_1'")
|
||||
in_df['text_1'] = in_df['text_1'].progress_apply(replace_newline)
|
||||
|
||||
tqdm.pandas(desc="Replacing '\\n' in 'text_2'")
|
||||
in_df['text_2'] = in_df['text_2'].progress_apply(replace_newline)
|
||||
|
||||
|
||||
|
||||
# In[6]:
|
||||
|
||||
|
||||
print(in_df.head())
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import nltk
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.stem import PorterStemmer
|
||||
from nltk.tokenize import word_tokenize
|
||||
import string
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
nltk.download('punkt')
|
||||
nltk.download('stopwords')
|
||||
|
||||
stop_words = set(stopwords.words('english'))
|
||||
stemmer = PorterStemmer()
|
||||
|
||||
def preprocess_text(text):
|
||||
text = text.lower()
|
||||
|
||||
# Remove punctuation
|
||||
text = text.translate(str.maketrans('', '', string.punctuation))
|
||||
|
||||
words = word_tokenize(text)
|
||||
|
||||
# Remove stopwords and stem the words
|
||||
words = [stemmer.stem(word) for word in words if word not in stop_words]
|
||||
|
||||
text = ' '.join(words)
|
||||
|
||||
return text
|
||||
|
||||
tqdm.pandas(desc="Processing 'text_1'")
|
||||
in_df['text_1'] = in_df['text_1'].progress_apply(preprocess_text)
|
||||
|
||||
|
||||
tqdm.pandas(desc="Processing 'text_2'")
|
||||
in_df['text_2'] = in_df['text_2'].progress_apply(preprocess_text)
|
||||
|
||||
|
||||
# In[8]:
|
||||
|
||||
|
||||
in_df.to_csv('preprocessed_text1_text2.tsv', sep='\t', index=False)
|
||||
|
||||
|
||||
# In[9]:
|
||||
|
||||
|
||||
def concatenate_texts(row):
|
||||
return str(row['text_1']) + ' <MASK> ' + str(row['text_2'])
|
||||
|
||||
tqdm.pandas(desc="Concatenating 'text_1' and 'text_2'")
|
||||
in_df['text'] = in_df.progress_apply(concatenate_texts, axis=1)
|
||||
|
||||
in_df.drop(['text_1', 'text_2'], axis=1, inplace=True)
|
||||
|
||||
|
||||
# In[31]:
|
||||
|
||||
|
||||
from tqdm import tqdm
|
||||
tqdm.pandas()
|
||||
|
||||
in_df = pd.read_csv('preprocessed_text1_text2.tsv', sep='\t')
|
||||
|
||||
expected_df = pd.read_csv('./challenging-america-word-gap-prediction/train/expected.tsv', sep='\t', header=None, on_bad_lines='warn')
|
||||
expected_df.columns = ['expected_word']
|
||||
|
||||
in_df = pd.concat([in_df, expected_df], axis=1)
|
||||
|
||||
def concatenate_texts(row):
|
||||
return str(row['text_1']) + ' ' + str(row['expected_word']) + ' ' + str(row['text_2'])
|
||||
|
||||
# Apply the function to each row and show progress
|
||||
in_df['unmasked_text'] = in_df.progress_apply(concatenate_texts, axis=1)
|
||||
|
||||
|
||||
# In[35]:
|
||||
|
||||
|
||||
|
||||
in_df['unmasked_text'].to_csv('preprocessed_text_join_unmask.tsv', sep='\t', index=False)
|
||||
|
||||
|
||||
# In[10]:
|
||||
|
||||
|
||||
in_df.to_csv('preprocessed_text_join_mask.tsv', sep='\t', index=False)
|
||||
|
||||
|
||||
# In[5]:
|
||||
|
||||
|
||||
import pandas as pd
|
||||
|
||||
in_df = pd.read_csv('preprocessed_text_join_mask.tsv', sep='\t')
|
||||
|
||||
print(in_df.head())
|
||||
|
||||
|
||||
# In[6]:
|
||||
|
||||
|
||||
print(in_df.head())
|
||||
|
||||
|
||||
# In[1]:
|
||||
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import Counter
|
||||
from nltk.util import ngrams
|
||||
from nltk.tokenize import word_tokenize
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import accuracy_score
|
||||
import numpy as np
|
||||
|
||||
class Model(ABC):
|
||||
|
||||
def __init__(self, UNK_token= '<MASK>', smoothing_parameter=0.5):
|
||||
self.UNK_token = UNK_token
|
||||
self.smoothing_parameter = smoothing_parameter
|
||||
|
||||
@abstractmethod
|
||||
def train(self, corpus):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, text):
|
||||
pass
|
||||
|
||||
class InterpolatedModel(Model):
|
||||
|
||||
def train(self, corpus, smoothing_parameter=0.5, vocab=15000):
|
||||
|
||||
self.smoothing_parameter=smoothing_parameter
|
||||
if corpus.empty:
|
||||
raise ValueError("The corpus is empty.")
|
||||
|
||||
corpus = list(set(corpus))
|
||||
|
||||
tokens = []
|
||||
for i, sentence in enumerate(corpus):
|
||||
sentence_tokens = word_tokenize(sentence)
|
||||
# Add padding to sentences that are less than three words long
|
||||
while len(sentence_tokens) < 3:
|
||||
sentence_tokens.append(self.UNK_token)
|
||||
tokens.append(sentence_tokens)
|
||||
if i % 1000 == 0:
|
||||
print(f'Tokenizing sentence {i} of {len(corpus)}')
|
||||
|
||||
self.unigram_counts = Counter()
|
||||
self.bigram_counts = Counter()
|
||||
self.trigram_counts = Counter()
|
||||
for i, sentence in enumerate(tokens):
|
||||
self.unigram_counts.update(sentence)
|
||||
self.bigram_counts.update(list(ngrams(sentence, 2, pad_left=True, pad_right=True)))
|
||||
self.trigram_counts.update(list(ngrams(sentence, 3, pad_left=True, pad_right=True)))
|
||||
if i % 1000 == 0:
|
||||
print(f'Counting ngrams in sentence {i} of {len(tokens)}')
|
||||
|
||||
self.unigram_counts = Counter(dict(self.unigram_counts.most_common(vocab)))
|
||||
|
||||
self.vocab_size = len(self.unigram_counts)
|
||||
|
||||
def probability(self, text, word):
|
||||
tokens = word_tokenize(text)
|
||||
if len(tokens) < 2:
|
||||
# Use a unigram model if the input text is only one word
|
||||
return self.unigram_counts[word] / self.vocab_size if word in self.unigram_counts else 0
|
||||
else:
|
||||
# Use a bigram/trigram model if the input text is at least two words
|
||||
previous_bigram = tuple(tokens[-2:])
|
||||
unigram_prob = self.unigram_counts[word] / self.vocab_size if word in self.unigram_counts else 0
|
||||
bigram_prob = self.bigram_counts.get((previous_bigram[1], word), 0) / self.unigram_counts.get(previous_bigram[1], 1)
|
||||
trigram_prob = self.trigram_counts.get((*previous_bigram, word), 0) / self.bigram_counts.get(previous_bigram, 1)
|
||||
return self.smoothing_parameter * unigram_prob + (1 - 2 * self.smoothing_parameter) * bigram_prob + (1 - self.smoothing_parameter) * trigram_prob
|
||||
|
||||
def perplexity(self, sentence):
|
||||
tokens = word_tokenize(sentence)
|
||||
|
||||
probabilities = [self.probability(' '.join(tokens[:i]), tokens[i]) for i in range(1, len(tokens))]
|
||||
|
||||
log_probabilities = [np.log2(p) if p > 0 else float('-inf') for p in probabilities]
|
||||
|
||||
average_log_probability = np.mean(log_probabilities)
|
||||
|
||||
return np.power(2, -average_log_probability)
|
||||
|
||||
def predict(self, text):
|
||||
tokens = word_tokenize(text)
|
||||
if len(tokens) < 1:
|
||||
return None
|
||||
elif len(tokens) < 2:
|
||||
# Use a unigram model if the input text is only one word
|
||||
probabilities = {word: self.unigram_counts[word] / self.vocab_size if word in self.unigram_counts else 0 for word in self.unigram_counts.keys()}
|
||||
else:
|
||||
# Use a bigram/trigram model if the input text is at least two words
|
||||
previous_bigram = tuple(tokens[-2:])
|
||||
probabilities = {}
|
||||
for word in self.unigram_counts.keys():
|
||||
unigram_prob = self.unigram_counts[word] / self.vocab_size if word in self.unigram_counts else 0
|
||||
bigram_prob = self.bigram_counts.get((previous_bigram[1], word), 0) / self.unigram_counts.get(previous_bigram[1], 1)
|
||||
trigram_prob = self.trigram_counts.get((*previous_bigram, word), 0) / self.bigram_counts.get(previous_bigram, 1)
|
||||
probabilities[word] = self.smoothing_parameter * unigram_prob + (1 - 2 * self.smoothing_parameter) * bigram_prob + (1 - self.smoothing_parameter) * trigram_prob
|
||||
return max(probabilities, key=probabilities.get)
|
||||
|
||||
|
||||
# In[32]:
|
||||
|
||||
|
||||
in_df = pd.read_csv('preprocessed_text_join_unmask.tsv', sep='\t')
|
||||
expected_df = pd.read_csv('./challenging-america-word-gap-prediction/train/expected.tsv', sep='\t', header=None, on_bad_lines='warn')
|
||||
|
||||
|
||||
df = pd.concat([in_df, expected_df], axis=1)
|
||||
df.columns = ['text', 'expected_word']
|
||||
|
||||
quarter = len(df) // 4
|
||||
df = df.iloc[:quarter]
|
||||
|
||||
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
|
||||
|
||||
|
||||
model = InterpolatedModel()
|
||||
model.train(train_df['text'], smoothing_parameter=0.4, vocab=60000)
|
||||
|
||||
|
||||
|
||||
# In[33]:
|
||||
|
||||
|
||||
import pickle
|
||||
|
||||
with open('trained_model_quarter_sp04_vocab60k.pkl', 'wb') as file:
|
||||
pickle.dump(model, file)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import pickle
|
||||
|
||||
with open('trained_model_quarter_sp04_vocab60k.pkl', 'rb') as file:
|
||||
model = pickle.load(file)
|
||||
|
||||
|
||||
# Smoothing = 0.4
|
||||
|
||||
# In[14]:
|
||||
|
||||
|
||||
import pandas as pd
|
||||
|
||||
df = pd.read_csv('preprocessed_text_join_mask.tsv', sep='\t')
|
||||
|
||||
expected_df = pd.read_csv('./challenging-america-word-gap-prediction/train/expected.tsv', sep='\t', header=None)
|
||||
expected_df.columns = ['expected_word']
|
||||
|
||||
for index, row in df.head(100).iterrows():
|
||||
sentence = row['text']
|
||||
expected_word = expected_df.loc[index, 'expected_word']
|
||||
words = sentence.split()
|
||||
if '<MASK>' in words:
|
||||
mask_index = words.index('<MASK>')
|
||||
start_index = max(0, mask_index - 2)
|
||||
context = ' '.join(words[start_index:mask_index])
|
||||
predicted_word = model.predict(context)
|
||||
if predicted_word == expected_word:
|
||||
print(f'Index: {index} Predicted Word: {predicted_word}, Expected Word: {expected_word}')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# In[21]:
|
||||
|
||||
|
||||
import pandas as pd
|
||||
import csv
|
||||
|
||||
# Load the data
|
||||
in_df = pd.read_csv('./challenging-america-word-gap-prediction/dev-0/in.tsv', sep='\t', header=None, on_bad_lines='warn')
|
||||
expected_df = pd.read_csv('./challenging-america-word-gap-prediction/dev-0/expected.tsv', sep='\t', header=None, on_bad_lines='warn', quoting=csv.QUOTE_NONE)
|
||||
|
||||
|
||||
columns_to_drop = [0, 1, 2, 3, 4, 5]
|
||||
in_df.drop(columns_to_drop, axis=1, inplace=True)
|
||||
|
||||
in_df.columns = ['text_1', 'text_2']
|
||||
in_df['text_2'].fillna('', inplace=True)
|
||||
|
||||
|
||||
|
||||
# In[22]:
|
||||
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
def replace_newline(text):
|
||||
if isinstance(text, str):
|
||||
return text.replace('\\n', ' ')
|
||||
return text
|
||||
|
||||
tqdm.pandas(desc="Replacing '\\n' in 'text_1'")
|
||||
in_df['text_1'] = in_df['text_1'].progress_apply(replace_newline)
|
||||
|
||||
tqdm.pandas(desc="Replacing '\\n' in 'text_2'")
|
||||
in_df['text_2'] = in_df['text_2'].progress_apply(replace_newline)
|
||||
|
||||
|
||||
# In[23]:
|
||||
|
||||
|
||||
import nltk
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.stem import PorterStemmer
|
||||
from nltk.tokenize import word_tokenize
|
||||
import string
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
nltk.download('punkt')
|
||||
nltk.download('stopwords')
|
||||
|
||||
stop_words = set(stopwords.words('english'))
|
||||
stemmer = PorterStemmer()
|
||||
|
||||
def preprocess_text(text):
|
||||
text = text.lower()
|
||||
text = text.translate(str.maketrans('', '', string.punctuation))
|
||||
words = word_tokenize(text)
|
||||
words = [stemmer.stem(word) for word in words if word not in stop_words]
|
||||
text = ' '.join(words)
|
||||
return text
|
||||
|
||||
tqdm.pandas(desc="Processing 'text_1'")
|
||||
in_df['text_1'] = in_df['text_1'].progress_apply(preprocess_text)
|
||||
|
||||
|
||||
tqdm.pandas(desc="Processing 'text_2'")
|
||||
in_df['text_2'] = in_df['text_2'].progress_apply(preprocess_text)
|
||||
|
||||
|
||||
# In[24]:
|
||||
|
||||
|
||||
in_df.to_csv('preprocessed_dev_text1_text2.tsv', sep='\t', index=False)
|
||||
|
||||
|
||||
# In[25]:
|
||||
|
||||
|
||||
def concatenate_texts(row):
|
||||
return str(row['text_1']) + ' <MASK> ' + str(row['text_2'])
|
||||
|
||||
tqdm.pandas(desc="Concatenating 'text_1' and 'text_2'")
|
||||
in_df['text'] = in_df.progress_apply(concatenate_texts, axis=1)
|
||||
|
||||
in_df.drop(['text_1', 'text_2'], axis=1, inplace=True)
|
||||
|
||||
|
||||
# In[26]:
|
||||
|
||||
|
||||
in_df.to_csv('preprocessed_dev_text_join_mask.tsv', sep='\t', index=False)
|
||||
|
||||
|
||||
# In[34]:
|
||||
|
||||
|
||||
import csv
|
||||
df = pd.read_csv('preprocessed_dev_text_join_mask.tsv', sep='\t')
|
||||
|
||||
expected_df = pd.read_csv('./challenging-america-word-gap-prediction/dev-0/expected.tsv', sep='\t', header=None, quoting=csv.QUOTE_NONE)
|
||||
expected_df.columns = ['expected_word']
|
||||
|
||||
for index, row in df.head(100).iterrows():
|
||||
sentence = row['text']
|
||||
expected_word = expected_df.loc[index, 'expected_word']
|
||||
words = sentence.split()
|
||||
if '<MASK>' in words:
|
||||
mask_index = words.index('<MASK>')
|
||||
start_index = max(0, mask_index - 2)
|
||||
context = ' '.join(words[start_index:mask_index])
|
||||
predicted_word = model.predict(context)
|
||||
if predicted_word == expected_word:
|
||||
print(f'Index: {index} Predicted Word: {predicted_word}, Expected Word: {expected_word}')
|
||||
|
||||
|
||||
# In[36]:
|
||||
|
||||
|
||||
import csv
|
||||
df = pd.read_csv('preprocessed_dev_text_join_mask.tsv', sep='\t')
|
||||
|
||||
expected_df = pd.read_csv('./challenging-america-word-gap-prediction/dev-0/expected.tsv', sep='\t', header=None, quoting=csv.QUOTE_NONE)
|
||||
expected_df.columns = ['expected_word']
|
||||
|
||||
df['Word'] = None
|
||||
|
||||
for index, row in df.iterrows():
|
||||
sentence = row['text']
|
||||
words = sentence.split()
|
||||
if '<MASK>' in words:
|
||||
expected_word = expected_df.loc[index, 'expected_word']
|
||||
mask_index = words.index('<MASK>')
|
||||
start_index = max(0, mask_index - 2)
|
||||
context = ' '.join(words[start_index:mask_index])
|
||||
predicted_word = model.predict(context)
|
||||
df.loc[index, 'Word'] = predicted_word
|
||||
if predicted_word == expected_word:
|
||||
print(f'Index: {index} Predicted Word: {predicted_word}, Expected Word: {expected_word}')
|
||||
|
||||
df['Word'].to_csv('./challenging-america-word-gap-prediction/dev-0/out.tsv', sep='\t', index=False)
|
||||
|
||||
|
||||
# In[37]:
|
||||
|
||||
|
||||
import pandas as pd
|
||||
|
||||
in_df = pd.read_csv('./challenging-america-word-gap-prediction/test-A/in.tsv', sep='\t', header=None, on_bad_lines='warn')
|
||||
|
||||
columns_to_drop = [0, 1, 2, 3, 4, 5]
|
||||
in_df.drop(columns_to_drop, axis=1, inplace=True)
|
||||
|
||||
in_df.columns = ['text_1', 'text_2']
|
||||
in_df['text_2'].fillna('', inplace=True)
|
||||
|
||||
|
||||
# In[38]:
|
||||
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
def replace_newline(text):
|
||||
if isinstance(text, str):
|
||||
return text.replace('\\n', ' ')
|
||||
return text
|
||||
|
||||
tqdm.pandas(desc="Replacing '\\n' in 'text_1'")
|
||||
in_df['text_1'] = in_df['text_1'].progress_apply(replace_newline)
|
||||
|
||||
tqdm.pandas(desc="Replacing '\\n' in 'text_2'")
|
||||
in_df['text_2'] = in_df['text_2'].progress_apply(replace_newline)
|
||||
|
||||
|
||||
# In[39]:
|
||||
|
||||
|
||||
import nltk
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.stem import PorterStemmer
|
||||
from nltk.tokenize import word_tokenize
|
||||
import string
|
||||
from tqdm import tqdm
|
||||
|
||||
nltk.download('punkt')
|
||||
nltk.download('stopwords')
|
||||
|
||||
stop_words = set(stopwords.words('english'))
|
||||
stemmer = PorterStemmer()
|
||||
|
||||
def preprocess_text(text):
|
||||
text = text.lower()
|
||||
text = text.translate(str.maketrans('', '', string.punctuation))
|
||||
words = word_tokenize(text)
|
||||
words = [stemmer.stem(word) for word in words if word not in stop_words]
|
||||
text = ' '.join(words)
|
||||
|
||||
return text
|
||||
|
||||
tqdm.pandas(desc="Processing 'text_1'")
|
||||
in_df['text_1'] = in_df['text_1'].progress_apply(preprocess_text)
|
||||
|
||||
|
||||
tqdm.pandas(desc="Processing 'text_2'")
|
||||
in_df['text_2'] = in_df['text_2'].progress_apply(preprocess_text)
|
||||
|
||||
|
||||
# In[40]:
|
||||
|
||||
|
||||
in_df.to_csv('preprocessed_test_text1_text2.tsv', sep='\t', index=False)
|
||||
|
||||
|
||||
# In[41]:
|
||||
|
||||
|
||||
def concatenate_texts(row):
|
||||
return str(row['text_1']) + ' <MASK> ' + str(row['text_2'])
|
||||
|
||||
tqdm.pandas(desc="Concatenating 'text_1' and 'text_2'")
|
||||
in_df['text'] = in_df.progress_apply(concatenate_texts, axis=1)
|
||||
|
||||
in_df.drop(['text_1', 'text_2'], axis=1, inplace=True)
|
||||
|
||||
|
||||
# In[42]:
|
||||
|
||||
|
||||
in_df.to_csv('preprocessed_test_text_join_mask.tsv', sep='\t', index=False)
|
||||
|
||||
|
||||
# In[43]:
|
||||
|
||||
|
||||
df = pd.read_csv('preprocessed_dev_text_join_mask.tsv', sep='\t')
|
||||
|
||||
df['Word'] = None
|
||||
|
||||
for index, row in df.iterrows():
|
||||
sentence = row['text']
|
||||
words = sentence.split()
|
||||
if '<MASK>' in words:
|
||||
mask_index = words.index('<MASK>')
|
||||
start_index = max(0, mask_index - 2)
|
||||
context = ' '.join(words[start_index:mask_index])
|
||||
predicted_word = model.predict(context)
|
||||
df.loc[index, 'Word'] = predicted_word
|
||||
|
||||
|
||||
df['Word'].to_csv('./challenging-america-word-gap-prediction/test-A/out.tsv', sep='\t', index=False)
|
||||
|
10403
test-A/out.tsv
Normal file
10403
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user