fix directory, does not change much in score
This commit is contained in:
parent
1eb60bd963
commit
6e08c6f0af
@ -12,11 +12,10 @@ Perplexity hashed by
|
|||||||
<b>Zadania</b>
|
<b>Zadania</b>
|
||||||
-----------------
|
-----------------
|
||||||
1. Statystyczny model językowy (zadanie 5)
|
1. Statystyczny model językowy (zadanie 5)
|
||||||
- branch: master - Perplexity hashed on `dev-0`: 555.75
|
- branch: master - Perplexity hashed on `dev-0`: 549.12
|
||||||
- branch: 05_ngram - Perplexity hashed on `dev-0`: xxx
|
|
||||||
<br><br>
|
<br><br>
|
||||||
2. Neuronowy model językowy (zadanie 7)
|
2. Neuronowy model językowy (zadanie 7)
|
||||||
- branch: 07_neural - Perplexity hashed on `dev-0`: xxx
|
- branch: 07_neural - Perplexity hashed on `dev-0`: 465.53
|
||||||
<br><br>
|
<br><br>
|
||||||
3. Model neuronowy rekurencyjny (zadanie 9)
|
3. Model neuronowy rekurencyjny (zadanie 9)
|
||||||
- branch: 09_neural - Perplexity hashed on `dev-0`: xxx
|
- branch: 09_neural - Perplexity hashed on `dev-0`: xxx
|
||||||
|
9520
dev-0/out.tsv
9520
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
59
main.ipynb
59
main.ipynb
File diff suppressed because one or more lines are too long
9
run.py
9
run.py
@ -1,15 +1,14 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import csv
|
import csv
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
import random
|
|
||||||
from collections import Counter, defaultdict
|
from collections import Counter, defaultdict
|
||||||
import nltk
|
import nltk
|
||||||
import math
|
import math
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
directory = "train/in.tsv.xz"
|
directory = "train/in.tsv.xz"
|
||||||
|
directory_expected = "train/expected.tsv"
|
||||||
directory_dev_0 = "dev-0/in.tsv.xz"
|
directory_dev_0 = "dev-0/in.tsv.xz"
|
||||||
directory_test_A = "test-A/in.tsv.xz"
|
directory_test_A = "test-A/in.tsv.xz"
|
||||||
|
|
||||||
@ -70,13 +69,17 @@ class Model():
|
|||||||
|
|
||||||
dataframeList = pd.read_csv(directory, sep='\t', header=None, names=['FileId', 'Year', 'LeftContext', 'RightContext'], quoting=csv.QUOTE_NONE, chunksize=10000)
|
dataframeList = pd.read_csv(directory, sep='\t', header=None, names=['FileId', 'Year', 'LeftContext', 'RightContext'], quoting=csv.QUOTE_NONE, chunksize=10000)
|
||||||
|
|
||||||
expectedList = pd.read_csv(directory, sep='\t', header=None, names=['Word'], quoting=csv.QUOTE_NONE, chunksize=10000)
|
expectedList = pd.read_csv(directory_expected, sep='\t', header=None, names=['Word'], quoting=csv.QUOTE_NONE, chunksize=10000)
|
||||||
|
|
||||||
DATASET = ""
|
DATASET = ""
|
||||||
|
|
||||||
for number, (dataframe, expected) in enumerate(zip(dataframeList, expectedList)):
|
for number, (dataframe, expected) in enumerate(zip(dataframeList, expectedList)):
|
||||||
|
dataframe = dataframe.reset_index()
|
||||||
dataframe = dataframe.replace(r'\\r|\\n|\n|\\t', ' ', regex=True)
|
dataframe = dataframe.replace(r'\\r|\\n|\n|\\t', ' ', regex=True)
|
||||||
|
|
||||||
|
expected['Word'] = expected['Word'].apply(lambda x: str(x).strip())
|
||||||
|
word = expected['Word']
|
||||||
|
|
||||||
left_text = dataframe['LeftContext'].to_list()
|
left_text = dataframe['LeftContext'].to_list()
|
||||||
right_text = dataframe['RightContext'].to_list()
|
right_text = dataframe['RightContext'].to_list()
|
||||||
word = expected['Word'].to_list()
|
word = expected['Word'].to_list()
|
||||||
|
6258
test-A/out.tsv
6258
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user