diff --git a/.predict.py.swp b/.predict.py.swp index bd51602..30a67ed 100644 Binary files a/.predict.py.swp and b/.predict.py.swp differ diff --git a/.train.py.swp b/.train.py.swp index b1dcd49..7b698e4 100644 Binary files a/.train.py.swp and b/.train.py.swp differ diff --git a/dev-0/out.tsv b/dev-0/out.tsv index 76d3b4f..c4f333d 100644 --- a/dev-0/out.tsv +++ b/dev-0/out.tsv @@ -1401,3 +1401,3872 @@ S S S + S + S + S + P + P + S + S + P + S + S + P + S + S + S + S + P + P + S + S + S + S + P + P + S + S + P + S + S + S + S + S + S + P + P + S + S + P + P + P + S + S + P + S + P + P + S + S + S + P + P + P + S + S + S + S + P + S + S + S + P + P + S + S + S + S + S + S + S + S + S + P + S + P + S + S + S + S + S + P + S + S + P + P + S + S + S + P + S + S + S + S + S + S + P + S + P + P + P + S + S + S + S + S + P + P + S + S + P + S + P + S + P + S + P + P + S + S + S + S + P + S + S + S + S + P + P + P + S + S + S + S + S + S + S + S + P + S + P + S + S + S + S + P + S + S + S + S + S + S + S + S + S + S + P + P + S + P + S + S + S + P + S + P + P + S + P + S + S + P + S + P + S + S + S + P + S + S + P + P + S + S + S + P + P + S + S + P + S + P + P + P + S + S + S + S + S + S + S + S + S + P + S + S + S + S + P + P + S + P + P + P + S + S + P + S + S + P + S + P + S + P + S + S + S + S + P + S + S + P + S + S + P + S + P + S + P + S + S + S + P + S + S + S + S + S + P + S + P + P + S + P + S + S + S + P + S + P + S + P + P + S + P + S + S + P + S + P + P + S + P + S + P + S + S + S + S + S + S + S + P + P + S + S + S + S + S + S + P + P + S + P + P + S + S + S + P + P + P + S + P + S + S + S + S + S + S + S + S + S + S + S + S + P + P + S + S + S + P + P + P + S + S + P + S + S + S + S + P + S + S + P + P + S + S + P + S + S + S + S + S + S + S + P + S + S + P + S + S + S + S + S + S + S + S + P + P + P + P + P + S + P + S + S + P + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + P + P + S + S + S + S + P + S + S + P + P + S + S + S + S + S + S + S + P + S + P + S + S + S + S + S + S + S + S + S + S + S + S + S + S + P + S + S + S + S + P + S + P + S + S + P + P + P + S + S + S + S + S + S + P + S + S + P + P + P + P + S + P + P + S + P + S + S + S + S + P + S + S + S + P + S + S + P + P + P + S + P + S + P + S + P + S + S + S + S + P + S + P + S + S + S + S + P + S + S + P + S + S + S + S + P + S + S + P + S + S + S + S + S + P + S + S + P + P + S + P + S + S + S + S + P + P + P + S + S + S + S + P + S + P + S + P + S + S + S + S + P + S + S + S + P + S + S + S + S + S + S + S + S + P + P + P + P + S + S + S + S + S + S + S + S + S + P + P + S + S + S + S + P + S + P + S + P + S + S + P + S + S + S + S + P + P + P + S + P + S + S + P + S + S + P + P + P + S + P + S + S + S + S + P + P + P + P + P + P + S + P + S + S + S + P + P + S + P + P + S + S + S + P + S + P + P + P + S + S + P + S + P + S + S + P + S + S + P + S + P + S + S + S + S + S + P + S + S + S + P + S + P + S + S + S + P + S + S + S + S + P + P + P + P + S + S + S + P + P + S + P + P + P + P + S + S + S + S + S + P + S + S + S + S + S + S + S + P + S + S + S + P + S + P + S + S + P + S + S + S + S + S + P + P + P + S + P + S + P + S + P + S + S + P + S + S + S + S + S + S + P + S + S + S + P + S + S + P + S + S + S + P + P + P + S + P + S + S + P + S + S + S + S + S + S + P + S + S + P + S + S + S + S + S + S + P + P + P + S + S + S + P + P + S + S + S + P + P + P + S + P + S + S + S + S + S + P + S + S + P + P + S + S + S + S + S + S + S + S + S + P + P + P + P + P + S + P + S + S + S + S + P + S + P + S + S + S + S + S + S + P + S + S + S + S + S + S + S + S + S + S + S + S + S + S + P + S + S + P + P + S + P + P + P + P + S + S + P + P + S + S + S + S + S + P + P + S + S + S + P + S + P + S + S + S + P + S + S + P + S + S + S + P + S + S + P + P + S + S + S + S + P + P + P + S + P + S + P + P + P + S + P + S + S + P + S + S + S + S + S + S + S + S + S + P + S + S + S + P + S + P + P + P + P + S + S + P + P + S + S + P + P + P + S + S + S + S + S + P + S + S + P + P + P + S + S + S + S + S + S + S + S + P + S + S + S + P + S + S + S + S + P + S + S + S + P + S + S + S + S + P + S + S + S + P + S + S + S + P + S + P + S + S + S + P + P + P + S + P + P + P + S + S + P + P + P + P + S + S + P + S + S + P + S + S + S + P + P + S + S + S + S + S + P + P + S + S + S + P + P + S + S + P + S + P + S + P + P + P + P + S + P + P + P + S + P + S + S + S + P + S + S + S + P + S + S + S + S + S + S + P + S + P + S + S + S + P + S + S + S + P + P + P + S + P + S + S + P + P + S + S + S + S + S + S + S + S + S + P + S + P + P + P + S + S + S + P + P + P + S + P + S + P + S + S + S + S + P + S + S + S + S + S + S + P + P + P + P + S + S + S + P + S + S + S + S + S + S + P + S + P + S + S + S + P + S + P + S + S + S + P + S + S + P + P + S + S + P + S + P + S + S + P + P + S + S + P + S + P + P + S + P + S + S + S + S + P + S + P + P + S + S + S + S + S + S + P + S + P + S + P + S + S + P + P + S + S + S + P + S + S + S + S + P + S + S + P + P + S + S + S + P + S + S + P + S + S + P + S + S + S + S + P + S + P + P + P + P + P + P + S + S + S + P + P + S + S + P + S + P + S + S + S + S + S + S + P + S + S + P + S + S + S + S + S + S + P + P + P + S + S + P + S + P + S + S + S + P + P + P + P + S + S + P + S + S + S + S + S + P + P + P + P + S + S + S + P + P + P + S + S + S + S + S + P + S + P + S + S + S + P + S + S + S + S + S + S + P + P + S + S + S + S + P + P + P + S + S + S + P + S + S + P + S + P + S + S + S + S + S + S + P + S + S + S + P + S + S + S + S + S + S + S + S + S + S + S + S + P + S + S + S + P + P + S + S + S + P + P + S + P + S + P + S + S + S + S + S + S + P + S + S + S + S + S + S + S + S + S + S + S + S + P + S + P + S + S + P + P + S + S + P + S + P + S + S + P + P + S + P + S + S + S + P + S + P + S + S + P + S + P + S + S + S + S + S + S + S + S + S + S + S + P + P + P + P + P + P + S + S + P + S + S + P + S + S + P + P + S + S + P + S + S + S + S + S + S + S + S + S + P + P + P + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + P + P + S + S + P + P + S + S + S + S + P + S + S + P + S + S + P + S + S + S + S + S + P + P + S + S + P + P + S + P + S + S + S + S + S + S + S + S + S + P + S + P + S + S + S + S + S + P + S + S + S + P + S + S + S + S + S + S + S + S + S + S + S + P + P + S + S + P + S + P + P + S + S + S + S + S + P + S + P + S + S + P + P + S + S + S + S + S + P + S + S + P + P + P + S + S + P + P + S + P + S + P + S + S + S + S + S + S + P + S + S + P + P + P + S + P + S + S + S + S + P + S + S + P + S + S + S + P + S + S + S + P + P + S + S + S + P + S + P + P + P + S + S + S + S + P + S + S + S + S + S + S + S + S + S + S + P + S + P + S + S + S + S + P + S + S + S + S + S + S + S + P + S + S + S + S + S + S + S + S + S + S + P + P + S + S + S + S + P + S + S + S + S + S + S + P + P + S + S + P + S + S + S + S + P + S + S + S + S + P + P + P + S + P + S + P + P + P + P + S + P + S + P + S + S + P + S + P + S + S + S + S + S + P + P + S + S + S + S + P + P + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + P + S + P + S + S + S + P + S + S + S + P + S + P + P + S + P + P + S + S + P + S + S + P + S + S + S + S + S + S + P + S + S + S + S + P + P + S + S + P + S + S + P + P + S + S + S + S + P + S + P + P + S + S + S + S + S + S + S + P + S + P + S + P + P + S + S + S + P + S + P + P + S + P + S + P + S + P + S + S + S + S + S + S + P + S + S + S + S + P + P + S + P + S + P + S + S + P + P + S + P + S + S + S + P + P + S + P + P + P + S + S + P + S + S + S + P + S + S + S + P + P + P + S + S + P + P + S + S + P + P + P + S + S + S + S + P + S + S + P + S + S + S + P + P + S + S + P + S + P + P + S + S + S + S + S + S + S + P + S + S + S + S + S + S + S + P + S + S + S + P + S + S + P + S + S + S + S + S + P + S + S + S + S + S + P + S + S + S + S + P + S + S + S + S + S + P + S + S + P + P + S + S + S + S + S + P + S + P + S + S + S + S + S + S + P + S + S + P + S + S + S + S + P + P + S + S + S + P + S + P + S + S + S + P + S + S + S + S + S + P + S + S + S + S + S + S + P + S + P + P + P + P + P + S + S + S + S + S + S + S + S + P + S + S + S + S + P + S + S + S + S + S + S + S + S + S + S + S + P + S + P + S + P + S + S + S + S + S + P + P + S + P + S + S + P + P + S + P + S + S + P + S + S + P + S + S + S + S + S + S + P + P + S + S + P + S + P + P + S + P + S + P + S + P + P + S + S + S + P + S + P + P + P + S + S + S + S + S + S + S + P + S + P + S + P + P + S + S + S + P + S + S + P + S + S + S + P + S + S + S + P + P + S + S + S + P + S + S + P + S + S + S + S + S + S + P + P + P + S + S + P + S + S + S + S + P + P + P + P + S + S + P + P + S + P + S + P + P + P + S + S + S + P + S + S + P + S + S + S + S + P + S + S + S + S + P + S + P + P + S + P + S + S + P + S + S + S + S + S + S + S + P + P + S + P + P + P + S + S + P + P + P + S + P + P + S + S + S + S + P + S + S + S + S + S + P + S + S + S + P + S + S + S + S + S + P + S + S + S + P + S + P + S + P + P + S + P + S + S + P + S + S + P + S + S + S + S + S + S + S + S + P + P + P + S + S + S + P + P + S + S + S + S + S + S + S + P + P + P + S + P + S + S + P + S + S + S + S + S + P + P + P + S + S + P + S + S + S + P + S + S + S + P + P + P + S + P + P + S + S + P + P + S + P + S + P + P + S + S + P + P + S + S + S + P + S + S + S + S + S + S + P + S + S + P + S + P + P + P + P + S + S + S + P + S + S + P + S + S + S + S + S + S + S + S + P + S + P + S + S + S + S + S + S + S + P + S + P + S + S + P + S + P + P + P + S + P + P + S + S + S + P + S + S + S + S + P + S + P + P + P + S + S + S + S + S + S + S + S + S + S + P + S + S + P + S + S + S + S + S + P + S + S + P + S + S + P + S + S + S + S + P + P + P + S + S + P + S + P + S + S + S + S + S + P + S + S + S + S + P + P + S + S + S + P + P + P + S + S + S + S + S + P + P + P + S + S + S + S + P + P + S + S + S + S + S + P + S + S + S + S + S + S + S + S + S + P + S + P + P + S + S + P + P + P + S + P + P + S + S + S + S + S + P + S + P + P + S + S + S + S + S + S + S + S + S + S + S + S + P + S + S + S + S + S + P + S + S + P + S + S + S + S + P + S + S + S + P + P + S + S + S + S + P + S + P + S + S + S + S + S + S + P + S + P + S + P + S + S + S + P + P + S + S + P + S + S + S + S + S + S + S + S + S + S + S + P + S + S + S + S + S + S + S + P + P + S + P + S + S + S + P + S + S + S + P + S + P + P + S + S + S + P + P + P + S + S + S + S + P + P + S + S + S + S + S + P + S + P + S + S + S + P + P + P + S + P + S + S + P + P + S + P + P + S + P + S + P + P + S + S + S + S + S + S + P + S + S + S + S + P + P + P + S + S + S + S + P + P + S + S + S + S + S + P + P + S + P + P + S + S + S + P + S + S + S + S + P + S + S + P + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + P + P + S + S + P + P + S + S + S + S + S + S + P + S + S + S + S + S + S + S + P + S + P + P + S + S + S + S + S + S + S + P + S + S + S + S + P + P + S + P + S + S + S + S + S + P + S + S + S + S + P + S + S + S + S + S + S + P + P + S + S + S + P + S + S + S + P + P + S + S + P + S + S + P + P + S + S + S + S + S + S + S + P + S + S + S + S + P + S + S + S + S + S + S + P + P + P + P + S + S + P + S + S + S + P + P + S + S + S + P + S + S + S + S + P + P + S + S + P + P + P + P + P + P + S + P + S + S + S + P + P + P + S + P + S + S + S + S + S + S + S + P + P + P + S + S + P + S + S + P + S + S + S + S + S + S + P + S + P + S + S + S + P + P + P + S + P + S + S + P + S + P + P + S + S + S + P + S + S + P + S + S + S + S + S + S + S + S + S + P + P + S + S + S + S + P + S + S + S + S + S + S + P + S + S + P + P + P + P + S + P + P + S + S + P + S + S + S + S + P + P + P + P + S + S + S + S + S + S + S + S + S + S + S + P + S + S + S + P + S + S + S + S + P + P + P + P + S + S + S + P + S + P + S + S + S + P + P + S + P + S + P + S + S + S + P + S + P + S + P + P + S + P + P + S + P + S + S + S + P + P + S + S + S + S + P + P + S + S + S + P + S + S + S + S + S + S + S + S + S + S + S + P + P + S + P + S + S + P + S + P + S + S + P + P + P + S + S + S + S + S + S + S + P + P + S + S + S + S + S + S + S + P + P + S + P + S + S + S + S + S + S + S + S + S + S + P + P + S + S + P + S + S + S + S + P + P + S + P + P + P + S + S + S + P + S + S + S + S + S + S + S + S + P + P + P + P + S + S + S + S + S + S + P + S + S + S + S + S + S + S + S + P + S + S + P + S + S + S + S + P + S + P + P + S + P + S + P + P + P + S + P + S + S + S + S + S + S + S + S + P + S + S + P + S + S + P + S + S + P + S + S + S + S + S + S + S + P + P + S + P + P + S + P + S + S + P + P + S + P + S + S + S + S + S + S + P + P + P + P + P + P + P + S + P + S + P + S + S + S + S + P + S + S + P + S + S + S + P + S + S + P + S + P + P + S + S + S + S + S + S + S + S + S + S + P + S + P + S + S + S + S + S + S + S + S + S + P + S + S + S + P + S + P + P + P + P + S + P + P + S + P + S + S + S + S + P + S + P + S + S + P + S + P + S + S + P + S + P + P + P + P + S + S + P + S + P + S + S + P + S + S + P + S + S + S + P + S + S + S + P + P + S + P + S + P + S + S + P + S + S + S + S + P + S + S + S + S + S + P + P + S + S + S + P + P + S + P + S + P + P + S + S + S + S + P + S + S + P + P + S + S + P + S + S + P + S + S + S + S + S + S + S + S + P + S + P + P + P + S + P + S + S + S + S + S + S + P + S + P + P + S + S + P + S + S + S + S + P + S + P + S + S + S + S + S + S + S + S + P + S + S + S + P + S + S + P + S + P + S + S + S + P + S + S + P + P + P + P + S + P + P + P + S + S + S + P + S + S + S + S + S + S + S + S + S + S + P + S + S + S + P + S + P + S + P + P + S + S + S + S + S + P + P + P + S + S + S + S + P + S + P + S + P + S + P + S + P + S + P + P + P + S + P + S + P + S + S + P + P + S + S + S + S + S + P + P + S + S + S + P + S + P + P + P + S + S + S + S + S + P + P + S + S + S + S + S + S + P + S + S + P + S + S + P + P + S + S + P + S + P + P + S + S + S + S + S + S + S + S + S + S + S + S + P + P + S + S + S + S + P + P + S + P + P + S + S + S + S + S + S + S + S + P + S + P + S + S + P + S + S + S + P + P + S + S + P + P + S + S + P + S + S + S + P + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + S + P + S + P + S + P + S + S + S + S + S + P + P + S + S + S + P + P + P + S + P + P + S + P + S + S + P + S + S + P + S + S + P + S + S + S + P + S + P + S + S + S + S + S + S + S + S + P + S + S + P + P + S + P + P + P + P + S + S + S + P + S + S + P + S + P + S + S + S + P + P + S + P + S + P + P + S + S + S + P + P + S + S + S + S + S + P + S + S + S + S + S + S + S + S + S + S + S + P + S + S + P + P + P + P + P + S + S + S + S + P + S + P + P + S + S + S + P + S + P + S + S + P + P + S + S + S + P + P + S + S + P + S + P + S + P + P + S + S + P + S + S + S + P + S + S + P + S + S + S + P + P + S + S + S + S + P + S + P + S + P + P + P + S + S + P + S + S + S + P + S + S + S + S + P + S + S + P + S + S + S + P + S + S + S + P + S + S + P + S + P + P + S + S + S + P + P + P + P + S + S + P + P + S + S + P + S + S + P + P + S + S + S + S + P + P + S + S + S + S + P + P + S + S + P + S + P + S + S + S + S + S + S + S + P + S + S + S + S + S + P + P + S + P + S + S + S + P + S + S + S + S + S + P + S + S + S + S + S + S + S + S + S + P + P + P + S + S + S + P + S + P + P + S + P + S + S + P + S + P + S + P + S + S + S + S + S + S + S + S + S + S + S + P + S + S + S + S + S + P + P + S + S + P + S + S + S + S + P + P + S + P + P + S + P + S + S + P + P + S + P + P + S + S + S + P + S + S + S + S + S + S + S + S + S + S + S + S + S + P + S + S + S + S + P + S + P + S + S + S + P + S + P + S + S + S + S + S + P + P + P + P + S + P + P + P + S + P + S + P + P + P + P + P + S + S + P + P + S + S + S + S + S + P + S + S + S + S + S + S + P + P + S + S + S + S + S + S + S + P + P + S + S + P + S + P + P + S + P + P + S + P + P + S + S + S + S + S + S + S + S + S + S + P + S + S + P + S + P + S + S + S + S + S + P + S + P + S diff --git a/naive_base_model.pkl b/naive_base_model.pkl index 4a503b2..784759f 100644 Binary files a/naive_base_model.pkl and b/naive_base_model.pkl differ diff --git a/predict.py b/predict.py index 857f1dd..4ddf6a4 100755 --- a/predict.py +++ b/predict.py @@ -53,10 +53,10 @@ def main(): paranormal_class_logprob = pickle_list[0] sceptic_class_logprob = pickle_list[1] word_logprobs = pickle_list[2] - in_file = "test-A/in.tsv" - #in_file = "dev-0/in.tsv" - out_file = "test-A/out.tsv" - #out_file = "dev-0/out.tsv" + #in_file = "test-A/in.tsv" + in_file = "dev-0/in.tsv" + #out_file = "test-A/out.tsv" + out_file = "dev-0/out.tsv" print (f"in {in_file}") print (f"out {out_file}") with open(in_file) as in_f, open(out_file, 'w') as out_f: diff --git a/train.py b/train.py index 0773add..2394882 100755 --- a/train.py +++ b/train.py @@ -73,10 +73,10 @@ def calc_word_logprobs(word_counts): return word_logprobs def main(): - expected = './train/expected.tsv' - #expected = './dev-0/expected.tsv' - in_f = './train/in.tsv' - #in_f = './dev-0/in.tsv' + #expected = './train/expected.tsv' + expected = './dev-0/expected.tsv' + #in_f = './train/in.tsv' + in_f = './dev-0/in.tsv' print (f"expected {expected}") print (f"in {in_f}") paranormal_class_lgprob, skeptic_class_logprob = calc_class_logprob(expected)