This commit is contained in:
Maciej Czajka 2022-05-09 20:43:34 +02:00
parent ecfafbf86c
commit 14137acd6d
13 changed files with 310529 additions and 0 deletions

3
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
# Default ignored files
/shelf/
/workspace.xml

View File

@ -0,0 +1,18 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="main.*" />
<option value="nltk.translate.bleu_score.corpus_bleu" />
</list>
</option>
</inspection_tool>
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
<option name="processCode" value="true" />
<option name="processLiterals" value="true" />
<option name="processComments" value="true" />
</inspection_tool>
</profile>
</component>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

4
.idea/misc.xml Normal file
View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (base)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/paranormal-or-skeptic-ISI-public.iml" filepath="$PROJECT_DIR$/.idea/paranormal-or-skeptic-ISI-public.iml" />
</modules>
</component>
</project>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

5272
dev-0/in.tsv Normal file

File diff suppressed because one or more lines are too long

5272
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

49
run.py Normal file
View File

@ -0,0 +1,49 @@
from naivebayes import NaiveBayesTextClassifier
from spacy.lang.en.stop_words import STOP_WORDS as en_stop
naive_bayes = NaiveBayesTextClassifier(
categories=[0, 1],
stop_words=en_stop
)
with open('train/in.tsv', 'r', encoding='utf8') as f:
train = f.readlines()
with open('train/expected.tsv', 'r', encoding='utf8') as f:
expected = f.readlines()
for i in range(0, len(expected)):
expected[i] = int(expected[i])
step = 15000
start, end = 0, step
for i in range(0, len(expected), step):
naive_bayes.train(train[start:end], expected[start:end])
if start + step < len(expected):
start += step
else:
start = 0
end = min(start + step, len(expected))
with open('dev-0/in.tsv', 'r', encoding='utf8') as f:
dev_0 = f.readlines()
predicted_dev_0 = naive_bayes.classify(dev_0)
with open('dev-0/out.tsv', 'wt') as f:
for p in predicted_dev_0:
f.write(str(p) + '\n')
f.close()
with open('test-A/in.tsv', 'r', encoding='utf8') as f:
test_A = f.readlines()
predicted_test_A = naive_bayes.classify(test_A)
with open('test-A/out.tsv', 'wt') as f:
for p in predicted_test_A:
f.write(str(p) + '\n')
f.close()

5152
test-A/in.tsv Normal file

File diff suppressed because one or more lines are too long

5152
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff

289579
train/in.tsv Normal file

File diff suppressed because one or more lines are too long