This commit is contained in:
Ufnow 2021-05-31 00:54:42 +02:00
parent 756ef4277a
commit d2c6ee3e0e
10 changed files with 10510 additions and 0 deletions

3
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
# Default ignored files
/shelf/
/workspace.xml

View File

@ -0,0 +1,12 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="script.many_stop_words" />
</list>
</option>
</inspection_tool>
</profile>
</component>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

4
.idea/misc.xml Normal file
View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/paranormal-or-skeptic-ISI-public.iml" filepath="$PROJECT_DIR$/.idea/paranormal-or-skeptic-ISI-public.iml" />
</modules>
</component>
</project>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

5272
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

39
script.py Normal file
View File

@ -0,0 +1,39 @@
import numpy
import lzma
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
from sklearn import preprocessing
PIPELINE = make_pipeline(TfidfVectorizer(), MultinomialNB())
LABEL_ENC = preprocessing.LabelEncoder()
def get_file(path):
with open(path, encoding='utf-8') as file:
return file.readlines()
def get_xz(path):
with lzma.open(path, 'rt', encoding='utf-8') as file:
return file.readlines()
def get_model(train_in, train_expected):
train_expected = LABEL_ENC.fit_transform(train_expected)
model = PIPELINE.fit(train_in, train_expected)
return model
def do_prediction(path_a, path_b, train_expected_path):
train_in = get_xz(path_b)
train_expected = get_file(train_expected_path)
train_test_in = get_xz(path_a + '/in.tsv.xz')
model = get_model(train_in, train_expected)
prediction = model.predict(train_test_in)
numpy.savetxt(path_a + "/out.tsv", prediction, '%d')
if __name__ == '__main__':
do_prediction("dev-0", "./train/in.tsv.xz", "./train/expected.tsv")
do_prediction("test-A", "./train/in.tsv.xz", "./train/expected.tsv")

5152
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff