AI for Healthcare (Part 2)
Part 2 Building a Deep Learning Model using Tensorflow for 2D Imaging
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from glob import glob
%matplotlib inline
import matplotlib.pyplot as plt
## General libraries
import seaborn as sns
from itertools import chain
import pydicom
from random import sample
## Scikit-Learn
from skimage.io import imread, imshow
from skimage import io
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, auc, roc_auc_score, roc_curve, accuracy_score, precision_recall_curve, average_precision_score
## Keras
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.models import Sequential, Model, model_from_json
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from keras.applications.vgg16 import VGG16
## Load the NIH data to all_xray_df
all_xray_df = pd.read_csv('/data/Data_Entry_2017.csv')
all_image_paths = {os.path.basename(x): x for x in
glob(os.path.join('/data','images*', '*', '*.png'))}
print('Scans found:', len(all_image_paths), ', Total Headers', all_xray_df.shape[0])
all_xray_df['path'] = all_xray_df['Image Index'].map(all_image_paths.get)
all_xray_df.sample(3)
all_xray_df.columns
Data Preprocessing - Reference EDA notebook
- Create column for every disease
- Remove
Unnamed: 11
column - Remove
Patient Age
outlier data for ages above 100. Mostly these are ages 150-413 which don't make sense and obviously data entry error - Remove images for
Mass
andInfiltration
based on the Analysis in EDA notebook showing similarity in Pixel Density Distribution with Pneumonia cases.
## Here you may want to create some extra columns in your table with binary indicators of certain diseases
## rather than working directly with the 'Finding Labels' column
# Todo
all_labels = list(set(list(chain.from_iterable([i.split('|') for i in all_xray_df['Finding Labels']]))))
for c_label in all_labels:
all_xray_df[c_label] = all_xray_df['Finding Labels'].map(lambda finding: 1.0 if c_label in finding else 0)
all_xray_df.sample(3)
all_xray_df.drop(columns=['Unnamed: 11'], inplace=True)
idx = all_xray_df[all_xray_df['Patient Age'] > 100].index.tolist()
all_xray_df.drop(index=idx, inplace=True)
# Check number of Pneumonia cases
sum(all_xray_df['Pneumonia'] == 1)
all_labels
all_xray_df.shape
# First round will use this
xray_df = all_xray_df[(all_xray_df['Mass'] == 0) & (all_xray_df['Infiltration'] == 0)]
xray_df.drop(columns=['Mass', 'Infiltration'], inplace=True)
xray_df.shape
sum(xray_df['Pneumonia'] == 1)
sum(xray_df['Pneumonia'] == 0)
# Second round will use this instead
xray_df = all_xray_df.copy()
xray_df.shape
sum(xray_df['Pneumonia'] == 1)
sum(xray_df['Pneumonia'] == 0)
plt.figure(figsize=(10,6))
vals = all_xray_df[all_labels].sum().nlargest(15).sort_values()
colors = ['#d62728' if i =='Pneumonia' else '#1f77b4' for i in vals.index]
ax = vals.plot(kind='barh', color=colors)
ax.set(xlabel = 'Number of Images with Label')
ax.set(title='Total data distribution between findings')
plt.show()
plt.figure(figsize=(10,6))
vals = xray_df[xray_df.columns[12:].tolist()].sum().nlargest(15).sort_values()
colors = ['#d62728' if i =='Pneumonia' else '#1f77b4' for i in vals.index]
ax = vals.plot(kind='barh', color=colors)
ax.set(xlabel = 'Number of Images with Label')
ax.set(title='Data istribution between findings (Mass and Infiltration removed)')
plt.show()
xray_df['Patient Gender'].value_counts(normalize=True)
print(f"Total Pneumonia cases: {sum(xray_df['Pneumonia'] == 1)}")
print(f"Total Clear cases: {sum(xray_df['No Finding'] == 1)}")
print(f"Total Records: {len(xray_df)}")
print("-----------------")
print(f"Percent Pneumonia caes: {sum(xray_df['Pneumonia'] == 1)/len(xray_df)}")
print(f"Total Pneumonia cases: {sum(xray_df['Pneumonia'] == 1)}")
print(f"Total Clear cases: {sum(xray_df['No Finding'] == 1)}")
print(f"Total Records: {len(xray_df)}")
print("-----------------")
print(f"Percent Pneumonia caes: {sum(xray_df['Pneumonia'] == 1)/len(xray_df)}")
cols = ['path', 'Pneumonia', 'Patient Gender', 'Patient Age', 'View Position']
dt = xray_df[cols]
def create_splits(data, test_size=0.2, strat=None):
## Either build your own or use a built-in library to split your original dataframe into two sets
## that can be used for training and testing your model
## It's important to consider here how balanced or imbalanced you want each of those sets to be
## for the presence of pneumonia
# Todo
train_data, val_data = train_test_split(data,
test_size = test_size,
stratify = data[strat],
random_state=42)
return train_data, val_data
# Pass our parameters to the function
train_set, val_set = create_splits(dt, 0.2, 'Pneumonia')
train_size = len(train_set)
val_size = len(val_set)
print(f"Training: {sum(train_set['Pneumonia'] == 1)/train_size}, {sum(train_set['Pneumonia'] == 0)/train_size}")
print(f"Validation:, {sum(val_set['Pneumonia'] == 1)/val_size}, {sum(val_set['Pneumonia'] == 0)/val_size} ")
Research Papers and Articles regarding Imbalanced Classes in Deep Learning
- Survey on deep learning with class imbalance - Springer
- Solving Class Imbalance problem in CNN - Medium
- Deep Over-sampling Framework for Classifying Imbalanced Data - Paper
- SMOTE for Imbalanced Classification with Python
Note: From the research it seems a combination of undersampling
and oversampling
may produce the best results. This means undersampling
the majority class which in our case is the Non-Penumonia and oversampling
the minority class. Possibly in the future I will need to experiment further with these techniques. For now, I will just reduce the data from the majority class to bring them to a balance.
Handling Imbalance classes
Even though with Patient Gender there is a slight imbalance as well, I think it can be neglected for now. The current technique to balance Pneumonia and Non-Pneumonia classes will chunk out a lot of data already and I would like to preserve as much data as possible for training.
p_idx = train_set[train_set['Pneumonia']==1].index.tolist()
np_idx = train_set[train_set['Pneumonia']==0].index.tolist()
np_sample = sample(np_idx,len(p_idx))
train_df = train_set.loc[p_idx + np_sample]
train_df.shape
Let's check the data distribution between Positive and Negative
train_size = len(train_df)
print(f"Training: {sum(train_df['Pneumonia'] == 1)/train_size}, {sum(train_df['Pneumonia'] == 0)/train_size}")
p_idx = val_set[val_set['Pneumonia']==1].index.tolist()
np_idx = val_set[val_set['Pneumonia']==0].index.tolist()
np_sample = sample(np_idx,len(p_idx)*3)
val_df = val_set.loc[p_idx + np_sample]
val_df
val_df.shape
val_size = len(val_df)
print(f"Training: {sum(val_df['Pneumonia'] == 1)/val_size}, {sum(val_df['Pneumonia'] == 0)/val_size}")
from collections import Counter
Counter(train_df.Pneumonia)
Counter(val_df.Pneumonia)
train_df[train_df.Pneumonia == 1]['Patient Gender'].value_counts(normalize=True)
val_df[val_df.Pneumonia == 1]['Patient Gender'].value_counts(normalize=True)
Library imblearn
to investigate and revisit in the future
# !pip install -U imbalanced-learn
# import imblearn
# from imblearn.over_sampling import SMOTE
# print(imblearn.__version__)
## This is the image size that VGG16 takes as input
IMG_SIZE = (224, 224)
def my_image_augmentation(horizontal_flip=False,
vertical_flip=False,
height_shift_range=0,
width_shift_range=0,
rotation_range=0,
shear_range=0,
zoom_range=0):
## recommendation here to implement a package like Keras' ImageDataGenerator
## with some of the built-in augmentations
## keep an eye out for types of augmentation that are or are not appropriate for medical imaging data
## Also keep in mind what sort of augmentation is or is not appropriate for testing vs validation data
## STAND-OUT SUGGESTION: implement some of your own custom augmentation that's *not*
## built into something like a Keras package
# Todo
img_aug = ImageDataGenerator(rescale=1. / 255.0,
horizontal_flip = horizontal_flip,
vertical_flip = vertical_flip,
height_shift_range= height_shift_range,
width_shift_range=width_shift_range,
rotation_range=rotation_range,
shear_range = shear_range,
zoom_range=zoom_range)
return img_aug
def make_train_gen(train_idg, train_df, x_col, y_col, target_size, batch_size):
## Create the actual generators using the output of my_image_augmentation for your training data
## Suggestion here to use the flow_from_dataframe library, e.g.:
# my_train = my_image_augmentation(horizontal_flip = True,
# vertical_flip = False,
# height_shift_range= 0.1,
# width_shift_range=0.1,
# rotation_range=20,
# shear_range = 0.1,
# zoom_range=0.1)
train_gen = train_idg.flow_from_dataframe(dataframe=train_df,
directory=None,
x_col = x_col,
y_col = y_col,
class_mode = 'binary',
target_size = target_size,
batch_size = batch_size
)
# Todo
return train_gen
def make_val_gen(val_idg, val_df, x_col, y_col, target_size, batch_size):
# my_val_idg = my_image_augmentation()
val_gen = val_idg.flow_from_dataframe(dataframe = val_df,
directory=None,
x_col = x_col,
y_col = y_col,
class_mode = 'binary',
target_size = target_size,
batch_size = batch_size)
# Todo
return val_gen
Note: Due to error requiring that the classes should be string since the class_mode
is set to binary
train_df['Pneumonia'] = train_df['Pneumonia'].astype('str')
val_df['Pneumonia'] = val_df['Pneumonia'].astype('str')
train_idg = my_image_augmentation(True, False, 0.1, 0.1, 20, 0.1, 0.1)
val_idg = my_image_augmentation()
train_gen = make_train_gen(train_idg=train_idg,
train_df=train_df,
x_col='path',
y_col='Pneumonia',
target_size=IMG_SIZE,
batch_size=22)
val_gen = make_val_gen(val_idg, val_df, 'path', 'Pneumonia', IMG_SIZE, 22)
## May want to pull a single large batch of random validation data for testing after each epoch:
valX, valY = val_gen.next()
## May want to look at some examples of our augmented training data.
## This is helpful for understanding the extent to which data is being manipulated prior to training,
## and can be compared with how the raw data look prior to augmentation
t_x, t_y = next(train_gen)
fig, m_axs = plt.subplots(4, 4, figsize = (16, 16))
for (c_x, c_y, c_ax) in zip(t_x, t_y, m_axs.flatten()):
c_ax.imshow(c_x[:,:,0], cmap = 'bone')
if c_y == 1:
c_ax.set_title('Pneumonia')
else:
c_ax.set_title('No Pneumonia')
c_ax.axis('off')
model = VGG16(include_top=True, weights='imagenet')
model.summary()
def load_pretrained_model():
model = VGG16(include_top=True, weights='imagenet')
transfer_layer = model.get_layer('block5_pool')
vgg_model = Model(inputs = model.input, outputs = transfer_layer.output)
# Todo
for layer in vgg_model.layers[0:-2]:
layer.trainable = False
return vgg_model
vgg_model = load_pretrained_model()
vgg_model.summary()
def build_my_model(pre_trained):
my_model = Sequential()
# Add the convolutional part of the VGG16 model from above.
my_model.add(vgg_model)
my_model.add(Flatten())
# Flatten the output of the VGG16 model because it is from a
# convolutional layer.
my_model.add(Dense(1024, activation='relu'))
my_model.add(Dropout(0.5))
my_model.add(Dense(512, activation='relu'))
my_model.add(Dropout(0.5))
my_model.add(Dense(256, activation='relu'))
my_model.add(Dropout(0.5))
# Final output layer
# Add a dense (aka. fully-connected) layer.
# This is for combining features that the VGG16 model has
# recognized in the image.
my_model.add(Dense(1, activation='sigmoid'))
## Set our optimizer, loss function, and learning rate (you can change the learning rate here if you'd like)
## but otherwise this cell can be run as is
return my_model
model = build_my_model(vgg_model)
optimizer = Adam(lr=0.001)
loss = 'binary_crossentropy'
metrics = ['binary_accuracy']
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
model.summary()
## Below is some helper code that will allow you to add checkpoints to your model,
## This will save the 'best' version of your model by comparing it to previous epochs of training
## Note that you need to choose which metric to monitor for your model's 'best' performance if using this code.
## The 'patience' parameter is set to 10, meaning that your model will train for ten epochs without seeing
## improvement before quitting
# Todo
weight_path="{}_my_model2.best.hdf5".format('xray_class')
checkpoint = ModelCheckpoint(weight_path,
monitor= 'val_loss',
verbose=1,
save_best_only=True,
mode= 'min',
save_weights_only = True)
early = EarlyStopping(monitor= 'val_loss',
mode= 'min',
patience=10)
callbacks_list = [checkpoint, early]
history = model.fit_generator(train_gen, validation_data=[valX, valY], epochs=30, callbacks=callbacks_list )
history.history.keys()
!ls
## After training, make some predictions to assess your model's overall performance
## Note that detecting pneumonia is hard even for trained expert radiologists,
## so there is no need to make the model perfect.
model.load_weights(weight_path)
pred_Y = model.predict(valX, batch_size = 32, verbose = True)
Create a binary output instead of just probability using a standard 0.5
threshold for now
pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]
def plot_auc(t_y, p_y):
## Hint: can use scikit-learn's built in functions here like roc_curve
# Todo
fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
fpr, tpr, thresholds = roc_curve(t_y, p_y)
c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)' % ('Pneumonia', auc(fpr, tpr)))
c_ax.legend()
c_ax.set_xlabel('False Positive Rate')
c_ax.set_ylabel('True Positive Rate')
return
## what other performance statistics do you want to include here besides AUC?
# def ...
# Todo
def plot_pr(t_y, p_y):
fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
precision, recall, thresholds = precision_recall_curve(t_y, p_y)
c_ax.plot(precision, recall, label = '%s (AP Score:%0.2f)' % ('Pneumonia', average_precision_score(t_y,p_y)))
c_ax.legend()
c_ax.set_xlabel('Recall')
c_ax.set_ylabel('Precision')
#Also consider plotting the history of your model training:
def plot_history(history, kind):
'''
kind: either "accuracy" or "loss"
'''
plt.figure(figsize = (9, 9))
kind = 'binary_accuracy' if kind == 'accuracy' else kind
plt.plot(history.history[f'val_{kind}'], label=f'validation {kind}')
plt.plot(history.history[f'{kind}'], label=f'training {kind}')
plt.title(f'Validation/Training {kind}')
plt.ylabel('EPOCHS')
plt.legend()
plt.show()
return
plot_auc(valY, pred_Y)
## plot figures
plot_history(history, 'accuracy')
# Todo
plot_history(history, 'loss')
Once you feel you are done training, you'll need to decide the proper classification threshold that optimizes your model's performance for a given metric (e.g. accuracy, F1, precision, etc. You decide)
confusion_matrix(pred_Y_binary, valY)
tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
accuracy_score(pred_Y_binary, valY)
f1_score(pred_Y_binary, valY)
sens = tp/(tp+fn)
spec = tn/(tn+fp)
spec
precision, recall, thresholds = precision_recall_curve(valY, pred_Y)
## Find the threshold that optimize your model's performance,
## and use that threshold to make binary classification. Make sure you take all your metrics into consideration.
# Todo
val_gen = make_val_gen(val_idg, val_df, 'path', 'Pneumonia', IMG_SIZE, 100)
valX, valY = val_gen.next()
pred_Y = model.predict(valX, batch_size = 32, verbose = True)
## Let's look at some examples of true vs. predicted with our best model:
# Todo
fig, m_axs = plt.subplots(10, 10, figsize = (16, 16))
i = 0
for (c_x, c_y, c_ax) in zip(valX[0:100], valY[0:100], m_axs.flatten()):
c_ax.imshow(c_x[:,:,0], cmap = 'bone')
if c_y == 1:
if pred_Y[i] > 0.5:
c_ax.set_title('1, 1')
else:
c_ax.set_title('1, 0', color='red')
else:
if pred_Y[i] > 0.5:
c_ax.set_title('0, 1', color='red')
else:
c_ax.set_title('0, 0')
c_ax.axis('off')
i=i+1
pred_Y_binary = pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]
confusion_matrix(pred_Y_binary, valY)
acc = accuracy_score(pred_Y_binary, valY)
f1 = f1_score(pred_Y_binary, valY)
sens = tp/(tp+fn)
spec = tn/(tn+fp)
spec
plot_auc(valY, pred_Y)
plot_pr(valY, pred_Y)
## Just save model architecture to a .json:
model_json = model.to_json()
with open("my_model.json", "w") as json_file:
json_file.write(model_json)
!ls
json_file = open('my_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("xray_class_my_model.best.hdf5")
print("Loaded model from disk")
loaded_model.summary()
val_gen = make_val_gen(val_idg, val_df, 'path', 'Pneumonia', IMG_SIZE, 100)
valX, valY = val_gen.next()
pred_Y = loaded_model.predict(valX, batch_size = 32, verbose = True)
fig, m_axs = plt.subplots(10, 10, figsize = (16, 16))
i = 0
for (c_x, c_y, c_ax) in zip(valX[0:100], valY[0:100], m_axs.flatten()):
c_ax.imshow(c_x[:,:,0], cmap = 'bone')
if c_y == 1:
if pred_Y[i] > 0.5:
c_ax.set_title('1, 1')
else:
c_ax.set_title('1, 0', color='red')
else:
if pred_Y[i] > 0.5:
c_ax.set_title('0, 1', color='red')
else:
c_ax.set_title('0, 0')
c_ax.axis('off')
i=i+1
# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
for t in [0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
print(f'when threshold at {t} CF:')
print(confusion_matrix(pred_Y_binary, valY))
tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
print(f'\x1b[34maccuracy \x1b[0m= \x1b[31m{accuracy_score(pred_Y_binary, valY)}\x1b[0m')
print(f'F1 Score = {f1_score(pred_Y_binary, valY)}')
print(f'\x1b[34mSensitivity Score \x1b[0m= \x1b[31m{tp/(tp+fn)}\x1b[0m')
print(f'\x1b[34mSpecificity Score \x1b[0m= \x1b[31m{tn/(tn+fp)}\x1b[0m')
print('-------------------------------')
# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
sens = []
spec = []
for t in thresholds:
pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
sens.append(tp/(tp+fn))
spec.append(tn/(tn+fp))
plt.figure(figsize=(10,6))
plt.plot(thresholds, sens, label='Sensitivity')
plt.plot(thresholds, spec, label='Specificity')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.legend()
plt.show()
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
fscore = []
for t in thresholds:
pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
fscore.append(f1_score(valY, pred_Y_binary))
plt.figure(figsize=(10,6))
plt.plot(thresholds, fscore, label='f1 score')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.title('F1 Score vs Threshold')
plt.legend()
plt.show()
# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
prec = []
recall = []
for t in thresholds:
pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
prec.append(tp/(tp+fp))
recall.append(tp/(tp+fn))
plt.figure(figsize=(10,6))
plt.plot(thresholds, prec, label='Precision')
plt.plot(thresholds, recall, label='Recall')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.title('Precision and Recall vs Threshold ')
plt.legend()
plt.show()
history = model.fit_generator(train_gen, validation_data=[valX, valY], epochs=30, callbacks=callbacks_list )
## After training, make some predictions to assess your model's overall performance
## Note that detecting pneumonia is hard even for trained expert radiologists,
## so there is no need to make the model perfect.
model.load_weights(weight_path)
pred_Y = model.predict(valX, batch_size = 32, verbose = True)
Create a binary output instead of just probability using a standard 0.5
threshold for now
pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]
def plot_auc(t_y, p_y):
## Hint: can use scikit-learn's built in functions here like roc_curve
# Todo
fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
fpr, tpr, thresholds = roc_curve(t_y, p_y)
c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)' % ('Pneumonia', auc(fpr, tpr)))
c_ax.legend()
c_ax.set_xlabel('False Positive Rate')
c_ax.set_ylabel('True Positive Rate')
return
## what other performance statistics do you want to include here besides AUC?
# def ...
# Todo
def plot_pr(t_y, p_y):
fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
precision, recall, thresholds = precision_recall_curve(t_y, p_y)
c_ax.plot(precision, recall, label = '%s (AP Score:%0.2f)' % ('Pneumonia', average_precision_score(t_y,p_y)))
c_ax.legend()
c_ax.set_xlabel('Recall')
c_ax.set_ylabel('Precision')
#Also consider plotting the history of your model training:
def plot_history(history, kind):
'''
kind: either "accuracy" or "loss"
'''
plt.figure(figsize = (9, 9))
kind = 'binary_accuracy' if kind == 'accuracy' else kind
plt.plot(history.history[f'val_{kind}'], label=f'validation {kind}')
plt.plot(history.history[f'{kind}'], label=f'training {kind}')
plt.title(f'Validation/Training {kind}')
plt.ylabel('EPOCHS')
plt.legend()
plt.show()
return
plot_auc(valY, pred_Y)
## plot figures
plot_history(history, 'accuracy')
# Todo
plot_history(history, 'loss')
Once you feel you are done training, you'll need to decide the proper classification threshold that optimizes your model's performance for a given metric (e.g. accuracy, F1, precision, etc. You decide)
confusion_matrix(pred_Y_binary, valY)
tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
precision, recall, thresholds = precision_recall_curve(valY, pred_Y)
val_gen = make_val_gen(val_idg, val_df, 'path', 'Pneumonia', IMG_SIZE, 100)
valX, valY = val_gen.next()
pred_Y = model.predict(valX, batch_size = 32, verbose = True)
pred_Y_binary = pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]
confusion_matrix(pred_Y_binary, valY)
acc = accuracy_score(pred_Y_binary, valY)
f1 = f1_score(pred_Y_binary, valY)
sens = tp/(tp+fn)
spec = tn/(tn+fp)
spec
plot_auc(valY, pred_Y)
plot_pr(valY, pred_Y)
## Just save model architecture to a .json:
model_json = model.to_json()
with open("my_model2.json", "w") as json_file:
json_file.write(model_json)
json_file = open('my_model2.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("xray_class_my_model2.best.hdf5")
print("Loaded model from disk")
val_gen = make_val_gen(val_idg, val_df, 'path', 'Pneumonia', IMG_SIZE, 100)
valX, valY = val_gen.next()
pred_Y = loaded_model.predict(valX, batch_size = 32, verbose = True)
# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
for t in [0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
print(f'when threshold at {t} CF:')
print(confusion_matrix(pred_Y_binary, valY))
tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
print(f'\x1b[34maccuracy \x1b[0m= \x1b[31m{accuracy_score(pred_Y_binary, valY)}\x1b[0m')
print(f'F1 Score = {f1_score(pred_Y_binary, valY)}')
print(f'\x1b[34mSensitivity Score \x1b[0m= \x1b[31m{tp/(tp+fn)}\x1b[0m')
print(f'\x1b[34mSpecificity Score \x1b[0m= \x1b[31m{tn/(tn+fp)}\x1b[0m')
print('-------------------------------')
# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
sens = []
spec = []
for t in thresholds:
pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
sens.append(tp/(tp+fn))
spec.append(tn/(tn+fp))
plt.figure(figsize=(10,6))
plt.plot(thresholds, sens, label='Sensitivity')
plt.plot(thresholds, spec, label='Specificity')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.legend()
plt.show()
fscore = []
for t in thresholds:
pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
fscore.append(f1_score(valY, pred_Y_binary))
plt.figure(figsize=(10,6))
plt.plot(thresholds, fscore, label='f1 score')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.title('F1 Score vs Threshold')
plt.legend()
plt.show()
# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
prec = []
recall = []
for t in thresholds:
pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
prec.append(tp/(tp+fp))
recall.append(tp/(tp+fn))
plt.figure(figsize=(10,6))
plt.plot(thresholds, prec, label='Precision')
plt.plot(thresholds, recall, label='Recall')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.title('Precision and Recall vs Threshold ')
plt.legend()
plt.show()
import random
p_t = all_xray_df[all_xray_df['Pneumonia'] == 1]
s = p_t['Finding Labels'].unique().tolist()
p_t['Finding Labels'].value_counts().nlargest(10)
cols = ['Pneumonia',
'Infiltration|Pneumonia',
'Edema|Infiltration|Pneumonia',
'Atelectasis|Pneumonia',
'Edema|Pneumonia',
'Effusion|Pneumonia',
'Effusion|Infiltration|Pneumonia',
'Consolidation|Pneumonia',
'Atelectasis|Infiltration|Pneumonia']
for r in cols:
idx = p_t[p_t['Finding Labels'].isin([r])].index
n_t = all_xray_df[all_xray_df['No Finding'] == 1]
idx2 = n_t.index
idx2 = random.choices(idx2, k=(len(idx)*3))
idx = idx.union(idx2)
val_df_2 = val_df[val_df.index.isin(idx)]
try:
val_gen = make_val_gen(val_idg, val_df_2, 'path', 'Pneumonia', IMG_SIZE, 100)
valX, valY = val_gen.next()
pred_Y = loaded_model.predict(valX, batch_size = 32, verbose = True)
pred_Y_binary = pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]
tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
prec = tp/(tp+fp)
acc = accuracy_score(pred_Y_binary, valY)
f1 = f1_score(pred_Y_binary, valY)
sens = tp/(tp+fn)
spec = tn/(tn+fp)
print(f'***************{r}***************')
print('True Positive:', tp, end=(', '))
print('True Negative:', tn, end=(', '))
print('False Positive', fp, end=(', '))
print('False Negative:', fn)
print('Sensitivity:', sens, end=(', '))
print('Specificity:', spec, end=(', '))
print('Precision:', prec, end=(', '))
print('F1 Score:', f1, end=(', '))
except:
continue
Just focusing on Sensitivity for now
sensitivity = {}
precision = {}
for r in s:
idx = p_t[p_t['Finding Labels'].isin([r])].index
n_t = all_xray_df[all_xray_df['No Finding'] == 1]
idx2 = n_t.index
idx2 = random.choices(idx2, k=(len(idx)*3))
idx = idx.union(idx2)
val_df_2 = val_df[val_df.index.isin(idx)]
try:
val_gen = make_val_gen(val_idg, val_df_2, 'path', 'Pneumonia', IMG_SIZE, 100)
valX, valY = val_gen.next()
pred_Y = loaded_model.predict(valX, batch_size = 32, verbose = True)
pred_Y_binary = pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]
tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
sens = tp/(tp+fn)
sensitivity[r] = sens
prec = tp/(tp+fp)
precision[r] = prec
except:
continue
sensitivity.keys()
plt.figure(figsize=(12,9))
plt.title('Sensitivity Score')
plt.bar(sensitivity.keys(), sensitivity.values())
plt.xticks(rotation='vertical')
plt.xlabel('Finding')
plt.ylabel('Score')
plt.figure(figsize=(12,9))
plt.title('Precision Score')
plt.bar(precision.keys(), precision.values())
plt.xticks(rotation='vertical')
plt.xlabel('Finding')
plt.ylabel('Score')