fix directory, does not change much in score
This commit is contained in:
parent
1eb60bd963
commit
6e08c6f0af
|
@ -12,11 +12,10 @@ Perplexity hashed by
|
|||
<b>Zadania</b>
|
||||
-----------------
|
||||
1. Statystyczny model językowy (zadanie 5)
|
||||
- branch: master - Perplexity hashed on `dev-0`: 555.75
|
||||
- branch: 05_ngram - Perplexity hashed on `dev-0`: xxx
|
||||
- branch: master - Perplexity hashed on `dev-0`: 549.12
|
||||
<br><br>
|
||||
2. Neuronowy model językowy (zadanie 7)
|
||||
- branch: 07_neural - Perplexity hashed on `dev-0`: xxx
|
||||
- branch: 07_neural - Perplexity hashed on `dev-0`: 465.53
|
||||
<br><br>
|
||||
3. Model neuronowy rekurencyjny (zadanie 9)
|
||||
- branch: 09_neural - Perplexity hashed on `dev-0`: xxx
|
||||
|
|
9520
dev-0/out.tsv
9520
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
59
main.ipynb
59
main.ipynb
File diff suppressed because one or more lines are too long
9
run.py
9
run.py
|
@ -1,15 +1,14 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
import random
|
||||
from collections import Counter, defaultdict
|
||||
import nltk
|
||||
import math
|
||||
from tqdm import tqdm
|
||||
|
||||
directory = "train/in.tsv.xz"
|
||||
directory_expected = "train/expected.tsv"
|
||||
directory_dev_0 = "dev-0/in.tsv.xz"
|
||||
directory_test_A = "test-A/in.tsv.xz"
|
||||
|
||||
|
@ -70,13 +69,17 @@ class Model():
|
|||
|
||||
dataframeList = pd.read_csv(directory, sep='\t', header=None, names=['FileId', 'Year', 'LeftContext', 'RightContext'], quoting=csv.QUOTE_NONE, chunksize=10000)
|
||||
|
||||
expectedList = pd.read_csv(directory, sep='\t', header=None, names=['Word'], quoting=csv.QUOTE_NONE, chunksize=10000)
|
||||
expectedList = pd.read_csv(directory_expected, sep='\t', header=None, names=['Word'], quoting=csv.QUOTE_NONE, chunksize=10000)
|
||||
|
||||
DATASET = ""
|
||||
|
||||
for number, (dataframe, expected) in enumerate(zip(dataframeList, expectedList)):
|
||||
dataframe = dataframe.reset_index()
|
||||
dataframe = dataframe.replace(r'\\r|\\n|\n|\\t', ' ', regex=True)
|
||||
|
||||
expected['Word'] = expected['Word'].apply(lambda x: str(x).strip())
|
||||
word = expected['Word']
|
||||
|
||||
left_text = dataframe['LeftContext'].to_list()
|
||||
right_text = dataframe['RightContext'].to_list()
|
||||
word = expected['Word'].to_list()
|
||||
|
|
6258
test-A/out.tsv
6258
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue