Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
d2c6ee3e0e |
3
.idea/.gitignore
vendored
Normal file
3
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
12
.idea/inspectionProfiles/Project_Default.xml
Normal file
12
.idea/inspectionProfiles/Project_Default.xml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<profile version="1.0">
|
||||||
|
<option name="myName" value="Project Default" />
|
||||||
|
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredIdentifiers">
|
||||||
|
<list>
|
||||||
|
<option value="script.many_stop_words" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
</profile>
|
||||||
|
</component>
|
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
4
.idea/misc.xml
Normal file
4
.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/paranormal-or-skeptic-ISI-public.iml" filepath="$PROJECT_DIR$/.idea/paranormal-or-skeptic-ISI-public.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
8
.idea/paranormal-or-skeptic-ISI-public.iml
Normal file
8
.idea/paranormal-or-skeptic-ISI-public.iml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
39
script.py
Normal file
39
script.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import numpy
|
||||||
|
import lzma
|
||||||
|
from sklearn.naive_bayes import MultinomialNB
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from sklearn.pipeline import make_pipeline
|
||||||
|
from sklearn import preprocessing
|
||||||
|
|
||||||
|
PIPELINE = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
||||||
|
LABEL_ENC = preprocessing.LabelEncoder()
|
||||||
|
|
||||||
|
|
||||||
|
def get_file(path):
|
||||||
|
with open(path, encoding='utf-8') as file:
|
||||||
|
return file.readlines()
|
||||||
|
|
||||||
|
|
||||||
|
def get_xz(path):
|
||||||
|
with lzma.open(path, 'rt', encoding='utf-8') as file:
|
||||||
|
return file.readlines()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model(train_in, train_expected):
|
||||||
|
train_expected = LABEL_ENC.fit_transform(train_expected)
|
||||||
|
model = PIPELINE.fit(train_in, train_expected)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def do_prediction(path_a, path_b, train_expected_path):
|
||||||
|
train_in = get_xz(path_b)
|
||||||
|
train_expected = get_file(train_expected_path)
|
||||||
|
train_test_in = get_xz(path_a + '/in.tsv.xz')
|
||||||
|
model = get_model(train_in, train_expected)
|
||||||
|
prediction = model.predict(train_test_in)
|
||||||
|
numpy.savetxt(path_a + "/out.tsv", prediction, '%d')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
do_prediction("dev-0", "./train/in.tsv.xz", "./train/expected.tsv")
|
||||||
|
do_prediction("test-A", "./train/in.tsv.xz", "./train/expected.tsv")
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user