57 lines
1.1 KiB
Python
57 lines
1.1 KiB
Python
import sys
|
|
import numpy as np
|
|
|
|
log = []
|
|
line = 0
|
|
|
|
unigrams = []
|
|
unigramsF = {}
|
|
|
|
unigramKeys = 0
|
|
unigramWords = 0
|
|
|
|
|
|
def makeUnigrams(unigrams):
|
|
for i in sys.stdin:
|
|
i += 1
|
|
i = i.lower().split()
|
|
unigrams.append(i[2:])
|
|
|
|
|
|
def countWordsAndKeys(keys, words, unigrams):
|
|
for u in unigrams:
|
|
for i in range(0, len(u)):
|
|
if u[i] not in unigramsF:
|
|
unigramsF[u[i]] = 1
|
|
keys += 1
|
|
else:
|
|
unigramsF[u[i]] += 1
|
|
words += 1
|
|
|
|
|
|
def logger(x):
|
|
tmp = ""
|
|
for i in x:
|
|
tmp += i + " "
|
|
print(tmp)
|
|
|
|
|
|
def logic(sortedUnigrams, val):
|
|
for k, v in sortedUnigrams[:100]:
|
|
val1 = int(v) + 1
|
|
log.append(str(k) + ":" + str(np.log(float(val1)/val)))
|
|
|
|
val2 = 0
|
|
for k, v in sortedUnigrams[100:]:
|
|
val2 += v + 1
|
|
log.append(":" + str(np.log(float(val2)/val)))
|
|
|
|
for i in range(line):
|
|
logger(log)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
makeUnigrams(unigrams)
|
|
countWordsAndKeys(unigramKeys, unigramWords, unigrams)
|
|
logic(sorted(unigramsF.items(), key=lambda x: x[1], reverse=True), unigramWords + unigramKeys)
|