This commit is contained in:
Jakub 2022-06-07 22:19:51 +02:00
parent 33b70ce7b1
commit f1320eab5f
8 changed files with 292322 additions and 0 deletions

20000
dev-0/meta.tsv Normal file

File diff suppressed because it is too large Load Diff

19998
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

11563
dev-1/meta.tsv Normal file

File diff suppressed because it is too large Load Diff

11562
dev-1/out.tsv Normal file

File diff suppressed because it is too large Load Diff

38
run.py Normal file
View File

@ -0,0 +1,38 @@
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
def load_data(path):
return pd.read_csv(path, sep='\t', header=None)
def write_res(data, path):
with open(path, 'w') as f:
for line in data:
f.write(f'{line}\n')
print(f"Data written {path}/out.tsv")
def main():
train = load_data('train/train.tsv')
text = train[4]
year = train.apply(lambda row: ((row[0] + row[1])/2), axis=1)
model = make_pipeline(TfidfVectorizer(), LinearRegression())
model.fit(text, year)
for path in ['dev-0', 'dev-1', 'test-A']:
in_df = load_data(f'{path}/in.tsv')
predict = model.predict(in_df[0])
write_res(predict, f'{path}/out.tsv')
if __name__ == '__main__':
main()

14219
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff

107471
train/meta.tsv Normal file

File diff suppressed because it is too large Load Diff

107471
train/train.tsv Normal file

File diff suppressed because one or more lines are too long