First test
This commit is contained in:
parent
c0a565f4d0
commit
8522487dc6
|
@ -1,8 +1,8 @@
|
||||||
|
|
||||||
*~
|
*~
|
||||||
*.swp
|
*.swp
|
||||||
*.bak
|
*.bak
|
||||||
*.pyc
|
*.pyc
|
||||||
*.o
|
*.o
|
||||||
.DS_Store
|
.DS_Store
|
||||||
.token
|
.token
|
||||||
|
|
26
README.md
26
README.md
|
@ -1,13 +1,13 @@
|
||||||
Skeptic vs paranormal subreddits
|
Skeptic vs paranormal subreddits
|
||||||
================================
|
================================
|
||||||
|
|
||||||
Classify a reddit as either from Skeptic subreddit or one of the
|
Classify a reddit as either from Skeptic subreddit or one of the
|
||||||
"paranormal" subreddits (Paranormal, UFOs, TheTruthIsHere, Ghosts,
|
"paranormal" subreddits (Paranormal, UFOs, TheTruthIsHere, Ghosts,
|
||||||
,Glitch-in-the-Matrix, conspiracytheories).
|
,Glitch-in-the-Matrix, conspiracytheories).
|
||||||
|
|
||||||
Output label is `S` and `P`.
|
Output label is `S` and `P`.
|
||||||
|
|
||||||
Sources
|
Sources
|
||||||
-------
|
-------
|
||||||
|
|
||||||
Data taken from <https://archive.org/details/2015_reddit_comments_corpus>.
|
Data taken from <https://archive.org/details/2015_reddit_comments_corpus>.
|
||||||
|
|
10544
dev-0/expected.tsv
10544
dev-0/expected.tsv
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1 +1 @@
|
||||||
PostText Timestamp
|
PostText Timestamp
|
||||||
|
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1 +1 @@
|
||||||
Label
|
Label
|
||||||
|
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import pandas as pd
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
# sort | uniq -c
|
||||||
|
#train = pd.read_csv("./train/in.tsv.xz", delimiter='\t')
|
||||||
|
#import sys
|
||||||
|
#for line in sys.stdin
|
||||||
|
#if re.search(r'UFO', line) print("P")
|
||||||
|
for line in sys.stdin:
|
||||||
|
if re.search(r'(ufo|lol|camera|picture|contact|phenomen|photo|paralysis|haunted|alien|demon|ghost|levitation|paranormal|spirit|telekinesis|flying|fake|sky|dream)', line.lower()):
|
||||||
|
print("P")
|
||||||
|
else:
|
||||||
|
print("S")
|
579158
train/expected.tsv
579158
train/expected.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue