import json
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
#import random
import numpy as np
import pickle
import tflearn
import tensorflow.compat.v1 as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Input, Dropout, Activation 
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical

WARNING:tensorflow:From /opt/anaconda3/lib/python3.9/site-packages/tensorflow/python/compat/v2_compat.py:107: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
Instructions for updating:
non-resource variables are not supported in the long term


# Reading the data file and axetracting the training data
f = open("intents.json")
file = json.loads(f.read())

text = []
label = []

for item in file['intents']:
    for text_id in item['patterns']:
        text.append(text_id) 
        label.append(item['tag'])
    #if file['tag'] not in label:
          #  label.append(file['tag'])

print(text)
print(label)

['Hi there', 'How are you', 'Is anyone there?', 'Hello', 'Good day', 'Bye', 'See you later', 'Goodbye', 'Nice chatting to you, bye', 'Till next time', 'Thanks', 'Thank you', "That's helpful", 'Awesome, thanks', 'Thanks for helping me', 'How you could help me?', 'What you can do?', 'What help you provide?', 'How you can be helpful?', 'What support is offered', 'How to check Adverse drug reaction?', 'Open adverse drugs module', 'Give me a list of drugs causing adverse behavior', 'List all drugs suitable for patient with adverse reaction', 'Which drugs dont have adverse reaction?', 'Open blood pressure module', 'Task related to blood pressure', 'Blood pressure data entry', 'I want to log blood pressure results', 'Blood pressure data management', 'I want to search for blood pressure result history', 'Blood pressure for patient', 'Load patient blood pressure result', 'Show blood pressure results for patient', 'Find blood pressure results by ID', 'Find me a pharmacy', 'Find pharmacy', 'List of pharmacies nearby', 'Locate pharmacy', 'Search pharmacy', 'Lookup for hospital', 'Searching for hospital to transfer patient', 'I want to search hospital data', 'Hospital lookup for patient', 'Looking up hospital details']
['greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'thanks', 'thanks', 'thanks', 'thanks', 'thanks', 'options', 'options', 'options', 'options', 'options', 'adverse_drug', 'adverse_drug', 'adverse_drug', 'adverse_drug', 'adverse_drug', 'blood_pressure', 'blood_pressure', 'blood_pressure', 'blood_pressure', 'blood_pressure', 'blood_pressure_search', 'blood_pressure_search', 'blood_pressure_search', 'blood_pressure_search', 'blood_pressure_search', 'pharmacy_search', 'pharmacy_search', 'pharmacy_search', 'pharmacy_search', 'pharmacy_search', 'hospital_search', 'hospital_search', 'hospital_search', 'hospital_search', 'hospital_search']


#dividing the data into training and testintg data by a ratio of 80:20
'''size = int(len(text)*0.8)
split_text = [text[i:i + size] for i in range(0, len(text), size)]
split_label = [label[i:i + size] for i in range(0, len(text), size)]

train_text_data = split_text[0]
test_text_data = split_text[1]
train_label_data = split_label[0]
test_label_data = split_label[1]'''

'size = int(len(text)*0.8)\nsplit_text = [text[i:i + size] for i in range(0, len(text), size)]\nsplit_label = [label[i:i + size] for i in range(0, len(text), size)]\n\ntrain_text_data = split_text[0]\ntest_text_data = split_text[1]\ntrain_label_data = split_label[0]\ntest_label_data = split_label[1]'


# vectorizing
words = []
new_sentences = []

lemmatizer = WordNetLemmatizer()
for sentence in text:
    word = nltk.word_tokenize(sentence.lower())
    lemmatized_output = ' '.join([lemmatizer.lemmatize(w) for w in word])
    words.extend(word)
    new_sentences.append(lemmatized_output)
    
vectorizer = CountVectorizer()
vectorizer.fit(new_sentences)
#print(vectorizer.vocabulary_)
train_text = vectorizer.transform(new_sentences)
train_text = train_text.toarray()

train_label = vectorizer.transform(label)
train_label = train_label.toarray()
#print(vectorizer2.vocabulary_)
#words = sorted(list(set(words)))
print(len(train_text[0]))

82


# The model
model = Sequential()
model.add(Dense(128, input_shape=(len(train_text[0]),), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(len(train_label[0]), activation='softmax'))

opt = tf.keras.optimizers.Adam(learning_rate=0.01, decay=1e-6)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

#print(model.summary())
train_label = np.argmax(train_label, axis=1)
model.fit(train_text, train_label, epochs=100, batch_size=32, verbose=1)

#score, acc = model.evaluate(test_text, test_label, batch_size=32)
#print(acc)

Train on 45 samples
Epoch 1/100
45/45 [==============================] - 0s 1ms/sample - loss: 4.3852 - acc: 0.1556
Epoch 2/100
45/45 [==============================] - 0s 68us/sample - loss: 3.5261 - acc: 0.7778
Epoch 3/100
45/45 [==============================] - 0s 77us/sample - loss: 2.2355 - acc: 0.7778
Epoch 4/100
45/45 [==============================] - 0s 73us/sample - loss: 0.9877 - acc: 0.7778
Epoch 5/100
45/45 [==============================] - 0s 74us/sample - loss: 0.7807 - acc: 0.7778
Epoch 6/100
45/45 [==============================] - 0s 78us/sample - loss: 0.6781 - acc: 0.7778
Epoch 7/100
45/45 [==============================] - 0s 71us/sample - loss: 0.5063 - acc: 0.8000
Epoch 8/100
45/45 [==============================] - 0s 73us/sample - loss: 0.3235 - acc: 0.8000
Epoch 9/100
45/45 [==============================] - 0s 81us/sample - loss: 0.3077 - acc: 0.8667
Epoch 10/100
45/45 [==============================] - 0s 79us/sample - loss: 0.2736 - acc: 0.9556
Epoch 11/100
45/45 [==============================] - 0s 79us/sample - loss: 0.1775 - acc: 0.9778
Epoch 12/100
45/45 [==============================] - 0s 73us/sample - loss: 0.1675 - acc: 0.9556
Epoch 13/100
45/45 [==============================] - 0s 82us/sample - loss: 0.1568 - acc: 0.9111
Epoch 14/100
45/45 [==============================] - 0s 82us/sample - loss: 0.1002 - acc: 1.0000
Epoch 15/100
45/45 [==============================] - 0s 75us/sample - loss: 0.0962 - acc: 1.0000
Epoch 16/100
45/45 [==============================] - 0s 80us/sample - loss: 0.0548 - acc: 1.0000
Epoch 17/100
45/45 [==============================] - 0s 83us/sample - loss: 0.0403 - acc: 1.0000

2023-06-02 10:27:13.978107: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.

Epoch 18/100
45/45 [==============================] - 0s 93us/sample - loss: 0.0394 - acc: 1.0000
Epoch 19/100
45/45 [==============================] - 0s 93us/sample - loss: 0.0248 - acc: 1.0000
Epoch 20/100
45/45 [==============================] - 0s 88us/sample - loss: 0.0158 - acc: 1.0000
Epoch 21/100
45/45 [==============================] - 0s 90us/sample - loss: 0.0062 - acc: 1.0000
Epoch 22/100
45/45 [==============================] - 0s 98us/sample - loss: 0.0093 - acc: 1.0000
Epoch 23/100
45/45 [==============================] - 0s 96us/sample - loss: 0.0072 - acc: 1.0000
Epoch 24/100
45/45 [==============================] - 0s 102us/sample - loss: 0.0053 - acc: 1.0000
Epoch 25/100
45/45 [==============================] - 0s 84us/sample - loss: 0.0014 - acc: 1.0000
Epoch 26/100
45/45 [==============================] - 0s 105us/sample - loss: 0.0043 - acc: 1.0000
Epoch 27/100
45/45 [==============================] - 0s 100us/sample - loss: 0.0030 - acc: 1.0000
Epoch 28/100
45/45 [==============================] - 0s 91us/sample - loss: 0.0046 - acc: 1.0000
Epoch 29/100
45/45 [==============================] - 0s 93us/sample - loss: 0.0059 - acc: 1.0000
Epoch 30/100
45/45 [==============================] - 0s 93us/sample - loss: 0.0021 - acc: 1.0000
Epoch 31/100
45/45 [==============================] - 0s 81us/sample - loss: 0.0033 - acc: 1.0000
Epoch 32/100
45/45 [==============================] - 0s 93us/sample - loss: 0.0033 - acc: 1.0000
Epoch 33/100
45/45 [==============================] - 0s 79us/sample - loss: 0.0012 - acc: 1.0000
Epoch 34/100
45/45 [==============================] - 0s 80us/sample - loss: 0.0028 - acc: 1.0000
Epoch 35/100
45/45 [==============================] - 0s 88us/sample - loss: 0.0010 - acc: 1.0000
Epoch 36/100
45/45 [==============================] - 0s 83us/sample - loss: 4.2334e-04 - acc: 1.0000
Epoch 37/100
45/45 [==============================] - 0s 89us/sample - loss: 4.2387e-04 - acc: 1.0000
Epoch 38/100
45/45 [==============================] - 0s 94us/sample - loss: 8.5613e-04 - acc: 1.0000
Epoch 39/100
45/45 [==============================] - 0s 93us/sample - loss: 7.1938e-04 - acc: 1.0000
Epoch 40/100
45/45 [==============================] - 0s 113us/sample - loss: 0.0018 - acc: 1.0000
Epoch 41/100
45/45 [==============================] - 0s 90us/sample - loss: 0.0045 - acc: 1.0000
Epoch 42/100
45/45 [==============================] - 0s 116us/sample - loss: 3.8056e-04 - acc: 1.0000
Epoch 43/100
45/45 [==============================] - 0s 104us/sample - loss: 0.0017 - acc: 1.0000
Epoch 44/100
45/45 [==============================] - 0s 110us/sample - loss: 4.3727e-04 - acc: 1.0000
Epoch 45/100
45/45 [==============================] - 0s 90us/sample - loss: 0.0011 - acc: 1.0000
Epoch 46/100
45/45 [==============================] - 0s 109us/sample - loss: 4.2020e-04 - acc: 1.0000
Epoch 47/100
45/45 [==============================] - 0s 94us/sample - loss: 4.8539e-04 - acc: 1.0000
Epoch 48/100
45/45 [==============================] - 0s 95us/sample - loss: 6.3319e-04 - acc: 1.0000
Epoch 49/100
45/45 [==============================] - 0s 122us/sample - loss: 0.0023 - acc: 1.0000
Epoch 50/100
45/45 [==============================] - 0s 94us/sample - loss: 7.9401e-04 - acc: 1.0000
Epoch 51/100
45/45 [==============================] - 0s 79us/sample - loss: 7.5775e-04 - acc: 1.0000
Epoch 52/100
45/45 [==============================] - 0s 91us/sample - loss: 2.7256e-04 - acc: 1.0000
Epoch 53/100
45/45 [==============================] - 0s 120us/sample - loss: 2.2644e-04 - acc: 1.0000
Epoch 54/100
45/45 [==============================] - ETA: 0s - loss: 9.4350e-04 - acc: 1.000 - 0s 143us/sample - loss: 6.7357e-04 - acc: 1.0000
Epoch 55/100
45/45 [==============================] - 0s 112us/sample - loss: 1.1166e-04 - acc: 1.0000
Epoch 56/100
45/45 [==============================] - 0s 86us/sample - loss: 1.5603e-04 - acc: 1.0000
Epoch 57/100
45/45 [==============================] - 0s 77us/sample - loss: 0.0015 - acc: 1.0000
Epoch 58/100
45/45 [==============================] - 0s 96us/sample - loss: 1.0572e-04 - acc: 1.0000
Epoch 59/100
45/45 [==============================] - 0s 86us/sample - loss: 9.4529e-05 - acc: 1.0000
Epoch 60/100
45/45 [==============================] - 0s 73us/sample - loss: 1.1195e-04 - acc: 1.0000
Epoch 61/100
45/45 [==============================] - 0s 67us/sample - loss: 1.0386e-04 - acc: 1.0000
Epoch 62/100
45/45 [==============================] - 0s 81us/sample - loss: 1.9429e-04 - acc: 1.0000
Epoch 63/100
45/45 [==============================] - 0s 85us/sample - loss: 2.2650e-04 - acc: 1.0000
Epoch 64/100
45/45 [==============================] - 0s 86us/sample - loss: 1.8409e-04 - acc: 1.0000
Epoch 65/100
45/45 [==============================] - 0s 79us/sample - loss: 2.0861e-04 - acc: 1.0000
Epoch 66/100
45/45 [==============================] - 0s 90us/sample - loss: 2.6926e-04 - acc: 1.0000
Epoch 67/100
45/45 [==============================] - 0s 75us/sample - loss: 4.4790e-04 - acc: 1.0000
Epoch 68/100
45/45 [==============================] - 0s 81us/sample - loss: 3.9756e-04 - acc: 1.0000
Epoch 69/100
45/45 [==============================] - 0s 74us/sample - loss: 8.7972e-04 - acc: 1.0000
Epoch 70/100
45/45 [==============================] - 0s 74us/sample - loss: 1.6232e-04 - acc: 1.0000
Epoch 71/100
45/45 [==============================] - 0s 83us/sample - loss: 3.0637e-04 - acc: 1.0000
Epoch 72/100
45/45 [==============================] - 0s 77us/sample - loss: 9.3986e-05 - acc: 1.0000
Epoch 73/100
45/45 [==============================] - 0s 66us/sample - loss: 4.0115e-04 - acc: 1.0000
Epoch 74/100
45/45 [==============================] - 0s 71us/sample - loss: 8.2845e-04 - acc: 1.0000
Epoch 75/100
45/45 [==============================] - 0s 74us/sample - loss: 7.9733e-04 - acc: 1.0000
Epoch 76/100
45/45 [==============================] - 0s 90us/sample - loss: 9.4912e-05 - acc: 1.0000
Epoch 77/100
45/45 [==============================] - 0s 75us/sample - loss: 0.0027 - acc: 1.0000
Epoch 78/100
45/45 [==============================] - 0s 87us/sample - loss: 0.0020 - acc: 1.0000
Epoch 79/100
45/45 [==============================] - 0s 66us/sample - loss: 3.2385e-04 - acc: 1.0000
Epoch 80/100
45/45 [==============================] - 0s 79us/sample - loss: 7.4550e-04 - acc: 1.0000
Epoch 81/100
45/45 [==============================] - 0s 86us/sample - loss: 1.8345e-04 - acc: 1.0000
Epoch 82/100
45/45 [==============================] - 0s 67us/sample - loss: 4.4940e-05 - acc: 1.0000
Epoch 83/100
45/45 [==============================] - 0s 87us/sample - loss: 0.0013 - acc: 1.0000
Epoch 84/100
45/45 [==============================] - 0s 86us/sample - loss: 5.5045e-05 - acc: 1.0000
Epoch 85/100
45/45 [==============================] - 0s 75us/sample - loss: 1.2468e-04 - acc: 1.0000
Epoch 86/100
45/45 [==============================] - 0s 70us/sample - loss: 9.6673e-05 - acc: 1.0000
Epoch 87/100
45/45 [==============================] - 0s 69us/sample - loss: 1.4471e-04 - acc: 1.0000
Epoch 88/100
45/45 [==============================] - 0s 97us/sample - loss: 1.9797e-05 - acc: 1.0000
Epoch 89/100
45/45 [==============================] - 0s 72us/sample - loss: 4.7380e-05 - acc: 1.0000
Epoch 90/100
45/45 [==============================] - 0s 67us/sample - loss: 5.1895e-04 - acc: 1.0000
Epoch 91/100
45/45 [==============================] - 0s 77us/sample - loss: 2.5427e-04 - acc: 1.0000
Epoch 92/100
45/45 [==============================] - 0s 66us/sample - loss: 6.2754e-05 - acc: 1.0000
Epoch 93/100
45/45 [==============================] - 0s 84us/sample - loss: 3.0124e-04 - acc: 1.0000
Epoch 94/100
45/45 [==============================] - 0s 88us/sample - loss: 5.2923e-05 - acc: 1.0000
Epoch 95/100
45/45 [==============================] - 0s 87us/sample - loss: 1.0138e-04 - acc: 1.0000
Epoch 96/100
45/45 [==============================] - 0s 90us/sample - loss: 0.0020 - acc: 1.0000
Epoch 97/100
45/45 [==============================] - 0s 85us/sample - loss: 6.9718e-04 - acc: 1.0000
Epoch 98/100
45/45 [==============================] - 0s 82us/sample - loss: 6.5152e-04 - acc: 1.0000
Epoch 99/100
45/45 [==============================] - 0s 85us/sample - loss: 4.4682e-05 - acc: 1.0000
Epoch 100/100
45/45 [==============================] - 0s 89us/sample - loss: 5.4066e-05 - acc: 1.0000

<keras.callbacks.History at 0x7fc20b069ac0>


def process_tool(sentence):
    tokens = nltk.word_tokenize(sentence.lower())
    words = [' '.join([lemmatizer.lemmatize(w) for w in tokens])]
    sentence_vect = vectorizer.transform(words)
    sentence_vect = sentence_vect.toarray()
    return sentence_vect

      #get the label for a given text
    
    index = labels[ind]
    return index

def response(intent, data):
    if intent.lower() == "bye":
        print("See you next time.")
    else:
        for info in data["intents"]:
            if info['tag'] == intent[0]:
                print("Chatty:", random.choice(info['responses']))
            break
    return reply

def response():
    while True:
        print("Me:", end="")
        msg = input()
        if msg.lower() == "bye":
            print("Chatty:", "See you next time.")
            break
            
        prediction = model.predict([process_tool(msg)])[0]
        ind = np.argmax(prediction)
        index = label[ind]
            
        for info in file["intents"]:
            if info['tag'] == index:
                print("Chatty:", np.random.choice(info['responses']))

print("You can chat with the bot now!")
print("(Enter 'bye to stop')")

response()

You can chat with the bot now!
(Enter 'bye to stop')
Me:hi
Chatty: Hello, thanks for asking
Me:How you could help me?
Chatty: Hello, thanks for asking
Me:

Building Chatbot in Python

Language understanding¶

Load data¶

Data Pre-processing¶

Model¶

Dialog management¶

Preprocessing the user input¶