-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtraining.py
More file actions
75 lines (58 loc) · 2.09 KB
/
training.py
File metadata and controls
75 lines (58 loc) · 2.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import random
import json
import pickle
import numpy as np
from tqdm import tqdm, trange
import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD
lemmatizer = WordNetLemmatizer()
intents = json.loads(open("intents.json").read())
words = []
classes = []
types = []
documents = []
ignore_letters = ["?", "!", ".", ","]
for intent in tqdm(intents["intents"]):
for pattern in intent["patterns"]:
word_list = nltk.word_tokenize(pattern)
words.extend(word_list)
documents.append((word_list, intent["tag"]))
if intent["tag"] not in classes:
classes.append(intent["tag"])
types.append(intent["type"])
words = [lemmatizer.lemmatize(word) for word in words if word not in ignore_letters]
words = sorted(set(words))
pickle.dump(words, open("models/words.pkl", "wb"))
pickle.dump(classes, open("models/classes.pkl", "wb"))
pickle.dump(types, open("models/types.pkl", "wb"))
training = []
output_empty = [0] * len(classes)
for document in tqdm(documents):
bag = []
word_patterns = document[0]
word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
for word in words:
bag.append(1) if word in word_patterns else bag.append(0)
output_row = list(output_empty)
output_row[classes.index(document[1])] = 1
training.append([bag, output_row])
random.shuffle(training)
training = np.array(training)
train_x = list(training[:, 0])
train_y = list(training[:, 1])
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation="softmax"))
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=["accuracy"])
hist = model.fit(
np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1
)
model.save("models/chatbotmodel.h5", hist)
print("Done")