64 lines
2.1 KiB
Python
64 lines
2.1 KiB
Python
import json
|
|
import pickle
|
|
import random
|
|
|
|
import nltk
|
|
import numpy as np
|
|
from keras.layers import Dense, Dropout
|
|
from keras.models import Sequential
|
|
from keras.optimizers import SGD
|
|
from nltk.stem import WordNetLemmatizer
|
|
|
|
lemmatizer = WordNetLemmatizer()
|
|
words = []
|
|
classes = []
|
|
documents = []
|
|
ignore_words = ['?', '!']
|
|
data_file = open('intents.json', encoding='utf-8').read()
|
|
intents = json.loads(data_file)
|
|
|
|
for intent in intents:
|
|
for pattern in intent['patterns']:
|
|
w = nltk.word_tokenize(pattern)
|
|
words.extend(w)
|
|
documents.append((w, intent['tag']))
|
|
if intent['tag'] not in classes:
|
|
classes.append(intent['tag'])
|
|
|
|
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
|
|
words = sorted(list(set(words)))
|
|
classes = sorted(list(set(classes)))
|
|
# documents = combination between patterns and intents
|
|
print(len(documents), "documents")
|
|
# classes = intents
|
|
print(len(classes), "classes", classes)
|
|
# words = all words, vocabulary
|
|
print(len(words), "unique lemmatized words", words)
|
|
pickle.dump(words, open('words.pkl', 'wb'))
|
|
pickle.dump(classes, open('classes.pkl', 'wb'))
|
|
|
|
training = []
|
|
for doc in documents:
|
|
pattern_words = [lemmatizer.lemmatize(word.lower()) for word in doc[0]]
|
|
bag = [1 if w in pattern_words else 0 for w in words]
|
|
output_row = [0 for _ in range(len(classes))]
|
|
output_row[classes.index(doc[1])] = 1
|
|
training.append([bag, output_row])
|
|
random.shuffle(training)
|
|
training = np.array(training)
|
|
train_x = list(training[:, 0])
|
|
train_y = list(training[:, 1])
|
|
print("Training data created")
|
|
|
|
model = Sequential()
|
|
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
|
|
model.add(Dropout(0.5))
|
|
model.add(Dense(64, activation='relu'))
|
|
model.add(Dropout(0.5))
|
|
model.add(Dense(len(train_y[0]), activation='softmax'))
|
|
sgd = SGD(learning_rate=0.005, decay=1e-6, momentum=0.9, nesterov=True)
|
|
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
|
|
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
|
|
model.save('chatbot_model.h5', hist)
|
|
print("model created")
|