aitech-eks-pub/cw/03a_tfidf_tasks_ODPOWIEDZI....

1.6 KiB

def word_to_index(word):
    vec = np.zeros(len(vocabulary))
    if word in vocabulary:
        idx = vocabulary.index(word)
        vec[idx] = 1
    else:
        vec[-1] = 1
    return vec
def tf(document):
    document_vector = None
    for word in document:
        if document_vector is None:
            document_vector = word_to_index(word)
        else:
            document_vector += word_to_index(word)
    return document_vector
def similarity(query, document):
    numerator = np.sum(query * document)
    denominator = np.sqrt(np.sum(query*query)) * np.sqrt(np.sum(document*document)) 
    return numerator / denominator