Hi
In [1]:
import json
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
#import random
import numpy as np
import pickle
import tflearn
import tensorflow.compat.v1 as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Input, Dropout, Activation
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
WARNING:tensorflow:From /opt/anaconda3/lib/python3.9/site-packages/tensorflow/python/compat/v2_compat.py:107: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. Instructions for updating: non-resource variables are not supported in the long term
Language understanding¶
Load data¶
In [2]:
# Reading the data file and axetracting the training data
f = open("intents.json")
file = json.loads(f.read())
text = []
label = []
for item in file['intents']:
for text_id in item['patterns']:
text.append(text_id)
label.append(item['tag'])
#if file['tag'] not in label:
# label.append(file['tag'])
print(text)
print(label)
['Hi there', 'How are you', 'Is anyone there?', 'Hello', 'Good day', 'Bye', 'See you later', 'Goodbye', 'Nice chatting to you, bye', 'Till next time', 'Thanks', 'Thank you', "That's helpful", 'Awesome, thanks', 'Thanks for helping me', 'How you could help me?', 'What you can do?', 'What help you provide?', 'How you can be helpful?', 'What support is offered', 'How to check Adverse drug reaction?', 'Open adverse drugs module', 'Give me a list of drugs causing adverse behavior', 'List all drugs suitable for patient with adverse reaction', 'Which drugs dont have adverse reaction?', 'Open blood pressure module', 'Task related to blood pressure', 'Blood pressure data entry', 'I want to log blood pressure results', 'Blood pressure data management', 'I want to search for blood pressure result history', 'Blood pressure for patient', 'Load patient blood pressure result', 'Show blood pressure results for patient', 'Find blood pressure results by ID', 'Find me a pharmacy', 'Find pharmacy', 'List of pharmacies nearby', 'Locate pharmacy', 'Search pharmacy', 'Lookup for hospital', 'Searching for hospital to transfer patient', 'I want to search hospital data', 'Hospital lookup for patient', 'Looking up hospital details'] ['greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'thanks', 'thanks', 'thanks', 'thanks', 'thanks', 'options', 'options', 'options', 'options', 'options', 'adverse_drug', 'adverse_drug', 'adverse_drug', 'adverse_drug', 'adverse_drug', 'blood_pressure', 'blood_pressure', 'blood_pressure', 'blood_pressure', 'blood_pressure', 'blood_pressure_search', 'blood_pressure_search', 'blood_pressure_search', 'blood_pressure_search', 'blood_pressure_search', 'pharmacy_search', 'pharmacy_search', 'pharmacy_search', 'pharmacy_search', 'pharmacy_search', 'hospital_search', 'hospital_search', 'hospital_search', 'hospital_search', 'hospital_search']
Data Pre-processing¶
In [3]:
#dividing the data into training and testintg data by a ratio of 80:20
'''size = int(len(text)*0.8)
split_text = [text[i:i + size] for i in range(0, len(text), size)]
split_label = [label[i:i + size] for i in range(0, len(text), size)]
train_text_data = split_text[0]
test_text_data = split_text[1]
train_label_data = split_label[0]
test_label_data = split_label[1]'''
Out[3]:
'size = int(len(text)*0.8)\nsplit_text = [text[i:i + size] for i in range(0, len(text), size)]\nsplit_label = [label[i:i + size] for i in range(0, len(text), size)]\n\ntrain_text_data = split_text[0]\ntest_text_data = split_text[1]\ntrain_label_data = split_label[0]\ntest_label_data = split_label[1]'
In [4]:
# vectorizing
words = []
new_sentences = []
lemmatizer = WordNetLemmatizer()
for sentence in text:
word = nltk.word_tokenize(sentence.lower())
lemmatized_output = ' '.join([lemmatizer.lemmatize(w) for w in word])
words.extend(word)
new_sentences.append(lemmatized_output)
vectorizer = CountVectorizer()
vectorizer.fit(new_sentences)
#print(vectorizer.vocabulary_)
train_text = vectorizer.transform(new_sentences)
train_text = train_text.toarray()
train_label = vectorizer.transform(label)
train_label = train_label.toarray()
#print(vectorizer2.vocabulary_)
#words = sorted(list(set(words)))
print(len(train_text[0]))
82
Model¶
In [5]:
# The model
model = Sequential()
model.add(Dense(128, input_shape=(len(train_text[0]),), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(len(train_label[0]), activation='softmax'))
opt = tf.keras.optimizers.Adam(learning_rate=0.01, decay=1e-6)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
#print(model.summary())
train_label = np.argmax(train_label, axis=1)
model.fit(train_text, train_label, epochs=100, batch_size=32, verbose=1)
#score, acc = model.evaluate(test_text, test_label, batch_size=32)
#print(acc)
Train on 45 samples Epoch 1/100 45/45 [==============================] - 0s 1ms/sample - loss: 4.3852 - acc: 0.1556 Epoch 2/100 45/45 [==============================] - 0s 68us/sample - loss: 3.5261 - acc: 0.7778 Epoch 3/100 45/45 [==============================] - 0s 77us/sample - loss: 2.2355 - acc: 0.7778 Epoch 4/100 45/45 [==============================] - 0s 73us/sample - loss: 0.9877 - acc: 0.7778 Epoch 5/100 45/45 [==============================] - 0s 74us/sample - loss: 0.7807 - acc: 0.7778 Epoch 6/100 45/45 [==============================] - 0s 78us/sample - loss: 0.6781 - acc: 0.7778 Epoch 7/100 45/45 [==============================] - 0s 71us/sample - loss: 0.5063 - acc: 0.8000 Epoch 8/100 45/45 [==============================] - 0s 73us/sample - loss: 0.3235 - acc: 0.8000 Epoch 9/100 45/45 [==============================] - 0s 81us/sample - loss: 0.3077 - acc: 0.8667 Epoch 10/100 45/45 [==============================] - 0s 79us/sample - loss: 0.2736 - acc: 0.9556 Epoch 11/100 45/45 [==============================] - 0s 79us/sample - loss: 0.1775 - acc: 0.9778 Epoch 12/100 45/45 [==============================] - 0s 73us/sample - loss: 0.1675 - acc: 0.9556 Epoch 13/100 45/45 [==============================] - 0s 82us/sample - loss: 0.1568 - acc: 0.9111 Epoch 14/100 45/45 [==============================] - 0s 82us/sample - loss: 0.1002 - acc: 1.0000 Epoch 15/100 45/45 [==============================] - 0s 75us/sample - loss: 0.0962 - acc: 1.0000 Epoch 16/100 45/45 [==============================] - 0s 80us/sample - loss: 0.0548 - acc: 1.0000 Epoch 17/100 45/45 [==============================] - 0s 83us/sample - loss: 0.0403 - acc: 1.0000
2023-06-02 10:27:13.978107: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Epoch 18/100 45/45 [==============================] - 0s 93us/sample - loss: 0.0394 - acc: 1.0000 Epoch 19/100 45/45 [==============================] - 0s 93us/sample - loss: 0.0248 - acc: 1.0000 Epoch 20/100 45/45 [==============================] - 0s 88us/sample - loss: 0.0158 - acc: 1.0000 Epoch 21/100 45/45 [==============================] - 0s 90us/sample - loss: 0.0062 - acc: 1.0000 Epoch 22/100 45/45 [==============================] - 0s 98us/sample - loss: 0.0093 - acc: 1.0000 Epoch 23/100 45/45 [==============================] - 0s 96us/sample - loss: 0.0072 - acc: 1.0000 Epoch 24/100 45/45 [==============================] - 0s 102us/sample - loss: 0.0053 - acc: 1.0000 Epoch 25/100 45/45 [==============================] - 0s 84us/sample - loss: 0.0014 - acc: 1.0000 Epoch 26/100 45/45 [==============================] - 0s 105us/sample - loss: 0.0043 - acc: 1.0000 Epoch 27/100 45/45 [==============================] - 0s 100us/sample - loss: 0.0030 - acc: 1.0000 Epoch 28/100 45/45 [==============================] - 0s 91us/sample - loss: 0.0046 - acc: 1.0000 Epoch 29/100 45/45 [==============================] - 0s 93us/sample - loss: 0.0059 - acc: 1.0000 Epoch 30/100 45/45 [==============================] - 0s 93us/sample - loss: 0.0021 - acc: 1.0000 Epoch 31/100 45/45 [==============================] - 0s 81us/sample - loss: 0.0033 - acc: 1.0000 Epoch 32/100 45/45 [==============================] - 0s 93us/sample - loss: 0.0033 - acc: 1.0000 Epoch 33/100 45/45 [==============================] - 0s 79us/sample - loss: 0.0012 - acc: 1.0000 Epoch 34/100 45/45 [==============================] - 0s 80us/sample - loss: 0.0028 - acc: 1.0000 Epoch 35/100 45/45 [==============================] - 0s 88us/sample - loss: 0.0010 - acc: 1.0000 Epoch 36/100 45/45 [==============================] - 0s 83us/sample - loss: 4.2334e-04 - acc: 1.0000 Epoch 37/100 45/45 [==============================] - 0s 89us/sample - loss: 4.2387e-04 - acc: 1.0000 Epoch 38/100 45/45 [==============================] - 0s 94us/sample - loss: 8.5613e-04 - acc: 1.0000 Epoch 39/100 45/45 [==============================] - 0s 93us/sample - loss: 7.1938e-04 - acc: 1.0000 Epoch 40/100 45/45 [==============================] - 0s 113us/sample - loss: 0.0018 - acc: 1.0000 Epoch 41/100 45/45 [==============================] - 0s 90us/sample - loss: 0.0045 - acc: 1.0000 Epoch 42/100 45/45 [==============================] - 0s 116us/sample - loss: 3.8056e-04 - acc: 1.0000 Epoch 43/100 45/45 [==============================] - 0s 104us/sample - loss: 0.0017 - acc: 1.0000 Epoch 44/100 45/45 [==============================] - 0s 110us/sample - loss: 4.3727e-04 - acc: 1.0000 Epoch 45/100 45/45 [==============================] - 0s 90us/sample - loss: 0.0011 - acc: 1.0000 Epoch 46/100 45/45 [==============================] - 0s 109us/sample - loss: 4.2020e-04 - acc: 1.0000 Epoch 47/100 45/45 [==============================] - 0s 94us/sample - loss: 4.8539e-04 - acc: 1.0000 Epoch 48/100 45/45 [==============================] - 0s 95us/sample - loss: 6.3319e-04 - acc: 1.0000 Epoch 49/100 45/45 [==============================] - 0s 122us/sample - loss: 0.0023 - acc: 1.0000 Epoch 50/100 45/45 [==============================] - 0s 94us/sample - loss: 7.9401e-04 - acc: 1.0000 Epoch 51/100 45/45 [==============================] - 0s 79us/sample - loss: 7.5775e-04 - acc: 1.0000 Epoch 52/100 45/45 [==============================] - 0s 91us/sample - loss: 2.7256e-04 - acc: 1.0000 Epoch 53/100 45/45 [==============================] - 0s 120us/sample - loss: 2.2644e-04 - acc: 1.0000 Epoch 54/100 45/45 [==============================] - ETA: 0s - loss: 9.4350e-04 - acc: 1.000 - 0s 143us/sample - loss: 6.7357e-04 - acc: 1.0000 Epoch 55/100 45/45 [==============================] - 0s 112us/sample - loss: 1.1166e-04 - acc: 1.0000 Epoch 56/100 45/45 [==============================] - 0s 86us/sample - loss: 1.5603e-04 - acc: 1.0000 Epoch 57/100 45/45 [==============================] - 0s 77us/sample - loss: 0.0015 - acc: 1.0000 Epoch 58/100 45/45 [==============================] - 0s 96us/sample - loss: 1.0572e-04 - acc: 1.0000 Epoch 59/100 45/45 [==============================] - 0s 86us/sample - loss: 9.4529e-05 - acc: 1.0000 Epoch 60/100 45/45 [==============================] - 0s 73us/sample - loss: 1.1195e-04 - acc: 1.0000 Epoch 61/100 45/45 [==============================] - 0s 67us/sample - loss: 1.0386e-04 - acc: 1.0000 Epoch 62/100 45/45 [==============================] - 0s 81us/sample - loss: 1.9429e-04 - acc: 1.0000 Epoch 63/100 45/45 [==============================] - 0s 85us/sample - loss: 2.2650e-04 - acc: 1.0000 Epoch 64/100 45/45 [==============================] - 0s 86us/sample - loss: 1.8409e-04 - acc: 1.0000 Epoch 65/100 45/45 [==============================] - 0s 79us/sample - loss: 2.0861e-04 - acc: 1.0000 Epoch 66/100 45/45 [==============================] - 0s 90us/sample - loss: 2.6926e-04 - acc: 1.0000 Epoch 67/100 45/45 [==============================] - 0s 75us/sample - loss: 4.4790e-04 - acc: 1.0000 Epoch 68/100 45/45 [==============================] - 0s 81us/sample - loss: 3.9756e-04 - acc: 1.0000 Epoch 69/100 45/45 [==============================] - 0s 74us/sample - loss: 8.7972e-04 - acc: 1.0000 Epoch 70/100 45/45 [==============================] - 0s 74us/sample - loss: 1.6232e-04 - acc: 1.0000 Epoch 71/100 45/45 [==============================] - 0s 83us/sample - loss: 3.0637e-04 - acc: 1.0000 Epoch 72/100 45/45 [==============================] - 0s 77us/sample - loss: 9.3986e-05 - acc: 1.0000 Epoch 73/100 45/45 [==============================] - 0s 66us/sample - loss: 4.0115e-04 - acc: 1.0000 Epoch 74/100 45/45 [==============================] - 0s 71us/sample - loss: 8.2845e-04 - acc: 1.0000 Epoch 75/100 45/45 [==============================] - 0s 74us/sample - loss: 7.9733e-04 - acc: 1.0000 Epoch 76/100 45/45 [==============================] - 0s 90us/sample - loss: 9.4912e-05 - acc: 1.0000 Epoch 77/100 45/45 [==============================] - 0s 75us/sample - loss: 0.0027 - acc: 1.0000 Epoch 78/100 45/45 [==============================] - 0s 87us/sample - loss: 0.0020 - acc: 1.0000 Epoch 79/100 45/45 [==============================] - 0s 66us/sample - loss: 3.2385e-04 - acc: 1.0000 Epoch 80/100 45/45 [==============================] - 0s 79us/sample - loss: 7.4550e-04 - acc: 1.0000 Epoch 81/100 45/45 [==============================] - 0s 86us/sample - loss: 1.8345e-04 - acc: 1.0000 Epoch 82/100 45/45 [==============================] - 0s 67us/sample - loss: 4.4940e-05 - acc: 1.0000 Epoch 83/100 45/45 [==============================] - 0s 87us/sample - loss: 0.0013 - acc: 1.0000 Epoch 84/100 45/45 [==============================] - 0s 86us/sample - loss: 5.5045e-05 - acc: 1.0000 Epoch 85/100 45/45 [==============================] - 0s 75us/sample - loss: 1.2468e-04 - acc: 1.0000 Epoch 86/100 45/45 [==============================] - 0s 70us/sample - loss: 9.6673e-05 - acc: 1.0000 Epoch 87/100 45/45 [==============================] - 0s 69us/sample - loss: 1.4471e-04 - acc: 1.0000 Epoch 88/100 45/45 [==============================] - 0s 97us/sample - loss: 1.9797e-05 - acc: 1.0000 Epoch 89/100 45/45 [==============================] - 0s 72us/sample - loss: 4.7380e-05 - acc: 1.0000 Epoch 90/100 45/45 [==============================] - 0s 67us/sample - loss: 5.1895e-04 - acc: 1.0000 Epoch 91/100 45/45 [==============================] - 0s 77us/sample - loss: 2.5427e-04 - acc: 1.0000 Epoch 92/100 45/45 [==============================] - 0s 66us/sample - loss: 6.2754e-05 - acc: 1.0000 Epoch 93/100 45/45 [==============================] - 0s 84us/sample - loss: 3.0124e-04 - acc: 1.0000 Epoch 94/100 45/45 [==============================] - 0s 88us/sample - loss: 5.2923e-05 - acc: 1.0000 Epoch 95/100 45/45 [==============================] - 0s 87us/sample - loss: 1.0138e-04 - acc: 1.0000 Epoch 96/100 45/45 [==============================] - 0s 90us/sample - loss: 0.0020 - acc: 1.0000 Epoch 97/100 45/45 [==============================] - 0s 85us/sample - loss: 6.9718e-04 - acc: 1.0000 Epoch 98/100 45/45 [==============================] - 0s 82us/sample - loss: 6.5152e-04 - acc: 1.0000 Epoch 99/100 45/45 [==============================] - 0s 85us/sample - loss: 4.4682e-05 - acc: 1.0000 Epoch 100/100 45/45 [==============================] - 0s 89us/sample - loss: 5.4066e-05 - acc: 1.0000
Out[5]:
<keras.callbacks.History at 0x7fc20b069ac0>
Dialog management¶
Preprocessing the user input¶
In [ ]:
def process_tool(sentence):
tokens = nltk.word_tokenize(sentence.lower())
words = [' '.join([lemmatizer.lemmatize(w) for w in tokens])]
sentence_vect = vectorizer.transform(words)
sentence_vect = sentence_vect.toarray()
return sentence_vect
#get the label for a given text
index = labels[ind]
return index
def response(intent, data):
if intent.lower() == "bye":
print("See you next time.")
else:
for info in data["intents"]:
if info['tag'] == intent[0]:
print("Chatty:", random.choice(info['responses']))
break
return reply
def response():
while True:
print("Me:", end="")
msg = input()
if msg.lower() == "bye":
print("Chatty:", "See you next time.")
break
prediction = model.predict([process_tool(msg)])[0]
ind = np.argmax(prediction)
index = label[ind]
for info in file["intents"]:
if info['tag'] == index:
print("Chatty:", np.random.choice(info['responses']))
print("You can chat with the bot now!")
print("(Enter 'bye to stop')")
response()
You can chat with the bot now! (Enter 'bye to stop') Me:hi Chatty: Hello, thanks for asking Me:How you could help me? Chatty: Hello, thanks for asking Me:
In [ ]: