Add run.py file
This commit is contained in:
parent
e2976f8d89
commit
86e23f9621
35
run.py
Normal file
35
run.py
Normal file
@ -0,0 +1,35 @@
|
||||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LinearRegression
|
||||
|
||||
|
||||
df = pd.read_csv("train.tsv", sep="\t", header=None)
|
||||
df["year_mean"] = (df[1] + df[0]) / 2
|
||||
|
||||
dev0_x = pd.read_csv("dev0_in.tsv", sep='\r\t', header=None)
|
||||
dev1_x = pd.read_csv("dev1_in.tsv", sep='\r\t', header=None)
|
||||
testA_x = pd.read_csv("testA_in.tsv", sep='\r\t', header=None)
|
||||
|
||||
vectorizer = TfidfVectorizer(max_features=2000, ngram_range=(1, 2))
|
||||
X = vectorizer.fit_transform(df[4])
|
||||
y = df["year_mean"]
|
||||
|
||||
model = LinearRegression().fit(X, y)
|
||||
|
||||
dev0_results = model.predict(vectorizer.transform(dev0_x[0]))
|
||||
dev1_results = model.predict(vectorizer.transform(dev1_x[0]))
|
||||
testA_results = model.predict(vectorizer.transform(testA_x[0]))
|
||||
|
||||
dev0_results = [str(x) + "\n" for x in dev0_results]
|
||||
dev1_results = [str(x) + "\n" for x in dev1_results]
|
||||
testA_results = [str(x) + "\n" for x in testA_results]
|
||||
|
||||
with open("dev0_out.tsv", "w", encoding="UTF-8") as file:
|
||||
file.writelines(dev0_results)
|
||||
|
||||
with open("dev1_out.tsv", "w", encoding="UTF-8") as file:
|
||||
file.writelines(dev1_results)
|
||||
|
||||
with open("testA_out.tsv", "w", encoding="UTF-8") as file:
|
||||
file.writelines(testA_results)
|
||||
|
Loading…
Reference in New Issue
Block a user