v1.0
This commit is contained in:
parent
33b70ce7b1
commit
f1320eab5f
20000
dev-0/meta.tsv
Normal file
20000
dev-0/meta.tsv
Normal file
File diff suppressed because it is too large
Load Diff
19998
dev-0/out.tsv
Normal file
19998
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
11563
dev-1/meta.tsv
Normal file
11563
dev-1/meta.tsv
Normal file
File diff suppressed because it is too large
Load Diff
11562
dev-1/out.tsv
Normal file
11562
dev-1/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
38
run.py
Normal file
38
run.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
|
from sklearn.pipeline import make_pipeline
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def load_data(path):
|
||||||
|
return pd.read_csv(path, sep='\t', header=None)
|
||||||
|
|
||||||
|
|
||||||
|
def write_res(data, path):
|
||||||
|
with open(path, 'w') as f:
|
||||||
|
for line in data:
|
||||||
|
f.write(f'{line}\n')
|
||||||
|
print(f"Data written {path}/out.tsv")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
train = load_data('train/train.tsv')
|
||||||
|
text = train[4]
|
||||||
|
year = train.apply(lambda row: ((row[0] + row[1])/2), axis=1)
|
||||||
|
|
||||||
|
model = make_pipeline(TfidfVectorizer(), LinearRegression())
|
||||||
|
model.fit(text, year)
|
||||||
|
|
||||||
|
|
||||||
|
for path in ['dev-0', 'dev-1', 'test-A']:
|
||||||
|
in_df = load_data(f'{path}/in.tsv')
|
||||||
|
predict = model.predict(in_df[0])
|
||||||
|
write_res(predict, f'{path}/out.tsv')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
14219
test-A/out.tsv
Normal file
14219
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
107471
train/meta.tsv
Normal file
107471
train/meta.tsv
Normal file
File diff suppressed because it is too large
Load Diff
107471
train/train.tsv
Normal file
107471
train/train.tsv
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user