Compare commits
5 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
12b6ee8fc6 | ||
![]() |
8d1e133c8e | ||
![]() |
c05e1e4df7 | ||
![]() |
f9b346e3fb | ||
![]() |
24dd877b29 |
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,4 +1,5 @@
|
||||
|
||||
in.tsv
|
||||
model.pkl
|
||||
*~
|
||||
*.swp
|
||||
*.bak
|
||||
@ -6,3 +7,4 @@
|
||||
*.o
|
||||
.DS_Store
|
||||
.token
|
||||
.idea
|
2
.idea/.gitignore
vendored
Normal file
2
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
# Default ignored files
|
||||
/workspace.xml
|
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
4
.idea/misc.xml
Normal file
4
.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
|
||||
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/paranormal-or-skeptic.iml" filepath="$PROJECT_DIR$/.idea/paranormal-or-skeptic.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
11
.idea/paranormal-or-skeptic.iml
Normal file
11
.idea/paranormal-or-skeptic.iml
Normal file
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.7" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="TestRunnerService">
|
||||
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
|
||||
</component>
|
||||
</module>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
10544
dev-0/out.tsv
10544
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
1
info.txt
Normal file
1
info.txt
Normal file
@ -0,0 +1 @@
|
||||
Use Naive Bayes implemented by some ready-made toolkit (e.g. sci-kit learn)
|
15
scores.txt
15
scores.txt
@ -1,15 +0,0 @@
|
||||
0.6920
|
||||
0.6857
|
||||
0.6969
|
||||
0.6931
|
||||
0.6927
|
||||
0.6952
|
||||
0.6969
|
||||
0.6969
|
||||
0.6959
|
||||
0.6959
|
||||
0.6965
|
||||
0.6965
|
||||
0.6965
|
||||
0.6954
|
||||
0.6965
|
46
solution.py
46
solution.py
@ -1,20 +1,42 @@
|
||||
import re
|
||||
import sys
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import csv
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
count_vect = CountVectorizer()
|
||||
|
||||
for line in sys.stdin:
|
||||
if re.search(r'UFO|paranormal|UFOs|video|night|house|saw|camera|lights|light|alien|aliens|ghost|object|dream|sky|room|ufo|craft|happened|sightings|footage|dreams|sleep', line):
|
||||
print("P")
|
||||
else:
|
||||
print("S")
|
||||
#load data:
|
||||
train = pd.read_csv("train/in.tsv", delimiter="\t", header=None, names=["text","date"], quoting=csv.QUOTE_NONE)
|
||||
texts = train["text"]
|
||||
y = pd.read_csv("train/expected.tsv", header=None)
|
||||
|
||||
#print(y)
|
||||
#train
|
||||
X_train_counts = count_vect.fit_transform(texts)
|
||||
clf = MultinomialNB().fit(X_train_counts, y)
|
||||
print(texts[0])
|
||||
print(len(texts))
|
||||
print(len(y))
|
||||
|
||||
#predict
|
||||
dev0 = pd.read_csv("dev-0/in.tsv", delimiter="\t", header=None, names=["text","date"], quoting=csv.QUOTE_NONE)["text"]
|
||||
testA = pd.read_csv("test-A/in.tsv", delimiter="\t", header=None, names=["text","date"], quoting=csv.QUOTE_NONE)["text"]
|
||||
|
||||
dev0_new_counts = count_vect.transform(dev0)
|
||||
testA_new_counts = count_vect.transform(testA)
|
||||
|
||||
predicted_dev0 = clf.predict(dev0_new_counts)
|
||||
predicted_testA = clf.predict(testA_new_counts)
|
||||
|
||||
print(len(dev0))
|
||||
print(len(predicted_dev0))
|
||||
|
||||
"""
|
||||
with open("dev-0/out.tsv", "w") as out1:
|
||||
for line in predicted_dev0:
|
||||
out1.write(line)
|
||||
out1.write("\n")
|
||||
|
||||
|
||||
happened|sightings|footage|dreams|sleep|videos|experiences|weird|objects|flying|strange|ET|photo|moving|fake|sighting|door|ghosts|looks|bed|spirits|paralysis|pictures|glitch|shadow|picture|space|photos|looked|phenomena|contact|spirit|stories|phenomenon|window|ufos|haunted|lol|creepy|lanterns|dark|scared|cameras|balloon|seen|beings|disclosure|story
|
||||
|
||||
"""
|
||||
with open("test-A/out.tsv", "w") as out2:
|
||||
for line in predicted_testA:
|
||||
out2.write(line)
|
||||
out2.write("\n")
|
||||
|
4
start.sh
4
start.sh
@ -1,4 +0,0 @@
|
||||
xzcat dev-0/in.tsv.xz | python3 solution.py > dev-0/out.tsv
|
||||
|
||||
xzcat test-A/in.tsv.xz | python3 solution.py > test-A/out.tsv
|
||||
geval -t dev-0 >>scores.txt
|
10304
test-A/out.tsv
10304
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@ -1,4 +0,0 @@
|
||||
xzcat in.tsv.xz | paste expected.tsv - | egrep -o '^ P.*'| egrep -o '[[:alpha:]]+' | sort > sortedP
|
||||
xzcat in.tsv.xz | paste expected.tsv - | egrep -o '^ S.*'| egrep -o '[[:alpha:]]+' | sort > sortedS
|
||||
comm -23 sortedP sortedS > PsetsubtractionS
|
||||
cat PsetsubtractionS | uniq -c | sort -nr > PsetsubtractionS_counted.txt
|
Loading…
Reference in New Issue
Block a user