maybe better
This commit is contained in:
parent
eed10e18da
commit
d4148c58f5
40000
dev-0/out.tsv
40000
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
23126
dev-1/out.tsv
23126
dev-1/out.tsv
File diff suppressed because it is too large
Load Diff
7
run.py
7
run.py
@ -12,18 +12,19 @@ Y_train = []
|
||||
stop = 0
|
||||
|
||||
with lzma.open('train/train.tsv.xz', 'rt', encoding="utf-8") as f:
|
||||
data = pd.read_csv(f, sep='\t', names=['Begin', 'End', 'Title', 'Publisher', 'Text'])
|
||||
data = pd.read_csv(f, sep='\t', names=['Begin', 'End', 'Text'])
|
||||
|
||||
|
||||
data = data[['Text', 'Begin']]
|
||||
data = data[['Text', 'End']]
|
||||
data = data[0:50000]
|
||||
|
||||
X = data['Text']
|
||||
y = data['Begin']
|
||||
y = data['End']
|
||||
|
||||
model = make_pipeline(TfidfVectorizer(), LinearRegression())
|
||||
model.fit(X, y)
|
||||
|
||||
|
||||
def readFile(filename):
|
||||
X_dev = []
|
||||
with open(filename, 'r', encoding="utf-8") as dev_in:
|
||||
|
28440
test-A/out.tsv
28440
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user