First test
This commit is contained in:
parent
c0a565f4d0
commit
8522487dc6
16
.gitignore
vendored
16
.gitignore
vendored
@ -1,8 +1,8 @@
|
|||||||
|
|
||||||
*~
|
*~
|
||||||
*.swp
|
*.swp
|
||||||
*.bak
|
*.bak
|
||||||
*.pyc
|
*.pyc
|
||||||
*.o
|
*.o
|
||||||
.DS_Store
|
.DS_Store
|
||||||
.token
|
.token
|
||||||
|
26
README.md
26
README.md
@ -1,13 +1,13 @@
|
|||||||
Skeptic vs paranormal subreddits
|
Skeptic vs paranormal subreddits
|
||||||
================================
|
================================
|
||||||
|
|
||||||
Classify a reddit as either from Skeptic subreddit or one of the
|
Classify a reddit as either from Skeptic subreddit or one of the
|
||||||
"paranormal" subreddits (Paranormal, UFOs, TheTruthIsHere, Ghosts,
|
"paranormal" subreddits (Paranormal, UFOs, TheTruthIsHere, Ghosts,
|
||||||
,Glitch-in-the-Matrix, conspiracytheories).
|
,Glitch-in-the-Matrix, conspiracytheories).
|
||||||
|
|
||||||
Output label is `S` and `P`.
|
Output label is `S` and `P`.
|
||||||
|
|
||||||
Sources
|
Sources
|
||||||
-------
|
-------
|
||||||
|
|
||||||
Data taken from <https://archive.org/details/2015_reddit_comments_corpus>.
|
Data taken from <https://archive.org/details/2015_reddit_comments_corpus>.
|
||||||
|
10544
dev-0/expected.tsv
10544
dev-0/expected.tsv
File diff suppressed because it is too large
Load Diff
0
dev-0/mostUsedP.txt
Normal file
0
dev-0/mostUsedP.txt
Normal file
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
@ -1 +1 @@
|
|||||||
PostText Timestamp
|
PostText Timestamp
|
||||||
|
|
1500
mostUsed.txt
Normal file
1500
mostUsed.txt
Normal file
File diff suppressed because it is too large
Load Diff
1525
mostUsedP.txt
Normal file
1525
mostUsedP.txt
Normal file
File diff suppressed because it is too large
Load Diff
1531
mostUsedS.txt
Normal file
1531
mostUsedS.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -1 +1 @@
|
|||||||
Label
|
Label
|
||||||
|
|
14
solve.py
Normal file
14
solve.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import pandas as pd
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
# sort | uniq -c
|
||||||
|
#train = pd.read_csv("./train/in.tsv.xz", delimiter='\t')
|
||||||
|
#import sys
|
||||||
|
#for line in sys.stdin
|
||||||
|
#if re.search(r'UFO', line) print("P")
|
||||||
|
for line in sys.stdin:
|
||||||
|
if re.search(r'(ufo|lol|camera|picture|contact|phenomen|photo|paralysis|haunted|alien|demon|ghost|levitation|paranormal|spirit|telekinesis|flying|fake|sky|dream)', line.lower()):
|
||||||
|
print("P")
|
||||||
|
else:
|
||||||
|
print("S")
|
579158
train/expected.tsv
579158
train/expected.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user