done
This commit is contained in:
parent
756ef4277a
commit
d2c6ee3e0e
3
.idea/.gitignore
vendored
Normal file
3
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
12
.idea/inspectionProfiles/Project_Default.xml
Normal file
12
.idea/inspectionProfiles/Project_Default.xml
Normal file
@ -0,0 +1,12 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<profile version="1.0">
|
||||
<option name="myName" value="Project Default" />
|
||||
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||
<option name="ignoredIdentifiers">
|
||||
<list>
|
||||
<option value="script.many_stop_words" />
|
||||
</list>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
</profile>
|
||||
</component>
|
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
4
.idea/misc.xml
Normal file
4
.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
|
||||
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/paranormal-or-skeptic-ISI-public.iml" filepath="$PROJECT_DIR$/.idea/paranormal-or-skeptic-ISI-public.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
8
.idea/paranormal-or-skeptic-ISI-public.iml
Normal file
8
.idea/paranormal-or-skeptic-ISI-public.iml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
39
script.py
Normal file
39
script.py
Normal file
@ -0,0 +1,39 @@
|
||||
import numpy
|
||||
import lzma
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn import preprocessing
|
||||
|
||||
PIPELINE = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
||||
LABEL_ENC = preprocessing.LabelEncoder()
|
||||
|
||||
|
||||
def get_file(path):
|
||||
with open(path, encoding='utf-8') as file:
|
||||
return file.readlines()
|
||||
|
||||
|
||||
def get_xz(path):
|
||||
with lzma.open(path, 'rt', encoding='utf-8') as file:
|
||||
return file.readlines()
|
||||
|
||||
|
||||
def get_model(train_in, train_expected):
|
||||
train_expected = LABEL_ENC.fit_transform(train_expected)
|
||||
model = PIPELINE.fit(train_in, train_expected)
|
||||
return model
|
||||
|
||||
|
||||
def do_prediction(path_a, path_b, train_expected_path):
|
||||
train_in = get_xz(path_b)
|
||||
train_expected = get_file(train_expected_path)
|
||||
train_test_in = get_xz(path_a + '/in.tsv.xz')
|
||||
model = get_model(train_in, train_expected)
|
||||
prediction = model.predict(train_test_in)
|
||||
numpy.savetxt(path_a + "/out.tsv", prediction, '%d')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
do_prediction("dev-0", "./train/in.tsv.xz", "./train/expected.tsv")
|
||||
do_prediction("test-A", "./train/in.tsv.xz", "./train/expected.tsv")
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user