Load libraries

The code below provides a skeleton for the model building & training component of your project. You can add/remove/build on code however you see fit, this is meant as a starting point.

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from glob import glob
%matplotlib inline
import matplotlib.pyplot as plt

## General libraries
import seaborn as sns
from itertools import chain
import pydicom
from random import sample 

## Scikit-Learn
from skimage.io import imread, imshow
from skimage import io
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, auc, roc_auc_score, roc_curve, accuracy_score, precision_recall_curve, average_precision_score

## Keras 
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.models import Sequential, Model, model_from_json
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from keras.applications.vgg16 import VGG16
Using TensorFlow backend.

Do some early processing of your metadata for easier model training:

## Load the NIH data to all_xray_df
all_xray_df = pd.read_csv('/data/Data_Entry_2017.csv')
all_image_paths = {os.path.basename(x): x for x in 
                   glob(os.path.join('/data','images*', '*', '*.png'))}
print('Scans found:', len(all_image_paths), ', Total Headers', all_xray_df.shape[0])
all_xray_df['path'] = all_xray_df['Image Index'].map(all_image_paths.get)
all_xray_df.sample(3)
Scans found: 112120 , Total Headers 112120
Image Index Finding Labels Follow-up # Patient ID Patient Age Patient Gender View Position OriginalImage[Width Height] OriginalImagePixelSpacing[x y] Unnamed: 11 path
53332 00013467_020.png Consolidation 20 13467 32 M AP 2500 2048 0.168 0.168 NaN /data/images_006/images/00013467_020.png
50286 00012722_001.png Effusion 1 12722 63 M PA 2978 2898 0.143 0.143 NaN /data/images_006/images/00012722_001.png
6270 00001695_001.png No Finding 1 1695 59 M PA 2500 2048 0.168 0.168 NaN /data/images_002/images/00001695_001.png
all_xray_df.columns
Index(['Image Index', 'Finding Labels', 'Follow-up #', 'Patient ID',
       'Patient Age', 'Patient Gender', 'View Position', 'OriginalImage[Width',
       'Height]', 'OriginalImagePixelSpacing[x', 'y]', 'Unnamed: 11', 'path'],
      dtype='object')

Data Preprocessing - Reference EDA notebook

  • Create column for every disease
  • Remove Unnamed: 11 column
  • Remove Patient Age outlier data for ages above 100. Mostly these are ages 150-413 which don't make sense and obviously data entry error
  • Remove images for Mass and Infiltration based on the Analysis in EDA notebook showing similarity in Pixel Density Distribution with Pneumonia cases.
## Here you may want to create some extra columns in your table with binary indicators of certain diseases 
## rather than working directly with the 'Finding Labels' column

# Todo
all_labels = list(set(list(chain.from_iterable([i.split('|') for i in all_xray_df['Finding Labels']]))))
for c_label in all_labels:
    all_xray_df[c_label] = all_xray_df['Finding Labels'].map(lambda finding: 1.0 if c_label in finding else 0)
all_xray_df.sample(3)
Image Index Finding Labels Follow-up # Patient ID Patient Age Patient Gender View Position OriginalImage[Width Height] OriginalImagePixelSpacing[x ... No Finding Edema Mass Pneumothorax Atelectasis Nodule Fibrosis Hernia Cardiomegaly Emphysema
87568 00021626_012.png Mass 12 21626 55 F AP 2500 2048 0.168 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
84734 00020864_000.png Infiltration 0 20864 43 M PA 2992 2991 0.143 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
93132 00023272_001.png No Finding 1 23272 47 M AP 3056 2544 0.139 ... 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

3 rows × 28 columns

all_xray_df.drop(columns=['Unnamed: 11'], inplace=True)

Remove Outliers in Patient Age

idx = all_xray_df[all_xray_df['Patient Age'] > 100].index.tolist()
all_xray_df.drop(index=idx, inplace=True)
# Check number of Pneumonia cases
sum(all_xray_df['Pneumonia'] == 1)
1430
all_labels
['Pneumonia',
 'Effusion',
 'Infiltration',
 'Consolidation',
 'Pleural_Thickening',
 'No Finding',
 'Edema',
 'Mass',
 'Pneumothorax',
 'Atelectasis',
 'Nodule',
 'Fibrosis',
 'Hernia',
 'Cardiomegaly',
 'Emphysema']

Remove images/rows for Diseases in [Mass, Infiltration]

all_xray_df.shape
(112104, 27)
# First round will use this
xray_df = all_xray_df[(all_xray_df['Mass'] == 0) & (all_xray_df['Infiltration'] == 0)]
xray_df.drop(columns=['Mass', 'Infiltration'], inplace=True)
/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py:3997: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
xray_df.shape
(87591, 25)
sum(xray_df['Pneumonia'] == 1)
788
sum(xray_df['Pneumonia'] == 0)
86803
# Second round will use this instead
xray_df = all_xray_df.copy()
xray_df.shape
(112104, 27)
sum(xray_df['Pneumonia'] == 1)
1430
sum(xray_df['Pneumonia'] == 0)
110674

Quick Data Check

  • Deep analysis is done in the EDA Notebook
plt.figure(figsize=(10,6))
vals = all_xray_df[all_labels].sum().nlargest(15).sort_values()
colors = ['#d62728' if i =='Pneumonia' else '#1f77b4' for i in vals.index]
ax = vals.plot(kind='barh', color=colors)
ax.set(xlabel = 'Number of Images with Label')
ax.set(title='Total data distribution between findings')
plt.show()
plt.figure(figsize=(10,6))
vals = xray_df[xray_df.columns[12:].tolist()].sum().nlargest(15).sort_values()
colors = ['#d62728' if i =='Pneumonia' else '#1f77b4' for i in vals.index]
ax = vals.plot(kind='barh', color=colors)
ax.set(xlabel = 'Number of Images with Label')
ax.set(title='Data istribution between findings (Mass and Infiltration removed)')
plt.show()

Check on few data distributions before we split the data

xray_df['Patient Gender'].value_counts(normalize=True)
M    0.560423
F    0.439577
Name: Patient Gender, dtype: float64

First Round - after removing Mass and Infiltration

print(f"Total Pneumonia cases: {sum(xray_df['Pneumonia'] == 1)}")
print(f"Total Clear cases: {sum(xray_df['No Finding'] == 1)}")
print(f"Total Records: {len(xray_df)}")
print("-----------------")
print(f"Percent Pneumonia caes: {sum(xray_df['Pneumonia'] == 1)/len(xray_df)}")
Total Pneumonia cases: 788
Total Clear cases: 60353
Total Records: 87591
-----------------
Percent Pneumonia caes: 0.008996358073318035

Second Round - Wihtout removing Mass and Infiltration

print(f"Total Pneumonia cases: {sum(xray_df['Pneumonia'] == 1)}")
print(f"Total Clear cases: {sum(xray_df['No Finding'] == 1)}")
print(f"Total Records: {len(xray_df)}")
print("-----------------")
print(f"Percent Pneumonia caes: {sum(xray_df['Pneumonia'] == 1)/len(xray_df)}")
Total Pneumonia cases: 1430
Total Clear cases: 60353
Total Records: 112104
-----------------
Percent Pneumonia caes: 0.012756012274316705

Create your training and testing data:

cols = ['path', 'Pneumonia', 'Patient Gender', 'Patient Age', 'View Position']
dt = xray_df[cols]
def create_splits(data, test_size=0.2, strat=None):
    
    ## Either build your own or use a built-in library to split your original dataframe into two sets 
    ## that can be used for training and testing your model
    ## It's important to consider here how balanced or imbalanced you want each of those sets to be
    ## for the presence of pneumonia
    
    # Todo
    train_data, val_data = train_test_split(data, 
                                            test_size = test_size, 
                                            stratify = data[strat],
                                            random_state=42)

    return train_data, val_data
# Pass our parameters to the function

train_set, val_set = create_splits(dt, 0.2, 'Pneumonia')
train_size = len(train_set)
val_size = len(val_set)

print(f"Training: {sum(train_set['Pneumonia'] == 1)/train_size}, {sum(train_set['Pneumonia'] == 0)/train_size}")
print(f"Validation:, {sum(val_set['Pneumonia'] == 1)/val_size}, {sum(val_set['Pneumonia'] == 0)/val_size} ")
Training: 0.012756040721206917, 0.987243959278793
Validation:, 0.01275589848802462, 0.9872441015119754 

Research Papers and Articles regarding Imbalanced Classes in Deep Learning

  1. Survey on deep learning with class imbalance - Springer
  2. Solving Class Imbalance problem in CNN - Medium
  3. Deep Over-sampling Framework for Classifying Imbalanced Data - Paper
  4. SMOTE for Imbalanced Classification with Python

Note: From the research it seems a combination of undersampling and oversampling may produce the best results. This means undersampling the majority class which in our case is the Non-Penumonia and oversampling the minority class. Possibly in the future I will need to experiment further with these techniques. For now, I will just reduce the data from the majority class to bring them to a balance.

Handling Imbalance classes

Even though with Patient Gender there is a slight imbalance as well, I think it can be neglected for now. The current technique to balance Pneumonia and Non-Pneumonia classes will chunk out a lot of data already and I would like to preserve as much data as possible for training.

(1) Training Set 50/50 split

p_idx = train_set[train_set['Pneumonia']==1].index.tolist()
np_idx = train_set[train_set['Pneumonia']==0].index.tolist()

np_sample = sample(np_idx,len(p_idx))
train_df = train_set.loc[p_idx + np_sample]
train_df.shape
(2288, 5)

Let's check the data distribution between Positive and Negative

train_size = len(train_df)

print(f"Training: {sum(train_df['Pneumonia'] == 1)/train_size}, {sum(train_df['Pneumonia'] == 0)/train_size}")
      
Training: 0.5, 0.5

(2) Validation Set 25/75 split

p_idx = val_set[val_set['Pneumonia']==1].index.tolist()
np_idx = val_set[val_set['Pneumonia']==0].index.tolist()

np_sample = sample(np_idx,len(p_idx)*3)
val_df = val_set.loc[p_idx + np_sample]
val_df
path Pneumonia Patient Gender Patient Age View Position
77329 /data/images_009/images/00018996_002.png 1.0 M 22 AP
57152 /data/images_007/images/00014197_002.png 1.0 M 58 AP
106986 /data/images_012/images/00028874_003.png 1.0 F 68 AP
64071 /data/images_007/images/00015809_023.png 1.0 F 38 AP
42455 /data/images_005/images/00010924_003.png 1.0 F 62 PA
... ... ... ... ... ...
97576 /data/images_011/images/00025754_012.png 0.0 M 67 AP
90305 /data/images_010/images/00022454_009.png 0.0 F 54 PA
103513 /data/images_011/images/00027627_002.png 0.0 F 68 PA
95363 /data/images_011/images/00025071_001.png 0.0 M 55 PA
35314 /data/images_005/images/00009324_001.png 0.0 M 26 AP

1144 rows × 5 columns

val_df.shape
(1144, 5)
val_size = len(val_df)

print(f"Training: {sum(val_df['Pneumonia'] == 1)/val_size}, {sum(val_df['Pneumonia'] == 0)/val_size}")
      
Training: 0.25, 0.75
Checking - analysis after reducing the majority non-pneumonia class
from collections import Counter
Counter(train_df.Pneumonia)
Counter({1.0: 1144, 0.0: 1144})
Counter(val_df.Pneumonia)
Counter({1.0: 286, 0.0: 858})
train_df[train_df.Pneumonia == 1]['Patient Gender'].value_counts(normalize=True)
M    0.583916
F    0.416084
Name: Patient Gender, dtype: float64
val_df[val_df.Pneumonia == 1]['Patient Gender'].value_counts(normalize=True)
M    0.590909
F    0.409091
Name: Patient Gender, dtype: float64

Library imblearn to investigate and revisit in the future

# !pip install -U imbalanced-learn
# import imblearn
# from imblearn.over_sampling import SMOTE
# print(imblearn.__version__)

Now we can begin our model-building & training

First suggestion: perform some image augmentation on your data

## This is the image size that VGG16 takes as input
IMG_SIZE = (224, 224)
def my_image_augmentation(horizontal_flip=False, 
                          vertical_flip=False, 
                          height_shift_range=0,
                          width_shift_range=0,
                          rotation_range=0,
                          shear_range=0,
                          zoom_range=0):
    
    ## recommendation here to implement a package like Keras' ImageDataGenerator
    ## with some of the built-in augmentations 
    
    ## keep an eye out for types of augmentation that are or are not appropriate for medical imaging data
    ## Also keep in mind what sort of augmentation is or is not appropriate for testing vs validation data
    
    ## STAND-OUT SUGGESTION: implement some of your own custom augmentation that's *not*
    ## built into something like a Keras package
    
    # Todo
    img_aug = ImageDataGenerator(rescale=1. / 255.0,
                              horizontal_flip = horizontal_flip, 
                              vertical_flip = vertical_flip, 
                              height_shift_range= height_shift_range, 
                              width_shift_range=width_shift_range, 
                              rotation_range=rotation_range, 
                              shear_range = shear_range,
                              zoom_range=zoom_range)
    
    return img_aug


def make_train_gen(train_idg, train_df, x_col, y_col, target_size, batch_size):
    
    ## Create the actual generators using the output of my_image_augmentation for your training data
    ## Suggestion here to use the flow_from_dataframe library, e.g.:
#     my_train = my_image_augmentation(horizontal_flip = True, 
#                               vertical_flip = False, 
#                               height_shift_range= 0.1, 
#                               width_shift_range=0.1, 
#                               rotation_range=20, 
#                               shear_range = 0.1,
#                               zoom_range=0.1)
    
    train_gen = train_idg.flow_from_dataframe(dataframe=train_df, 
                                         directory=None, 
                                         x_col = x_col,
                                         y_col = y_col,
                                         class_mode = 'binary',
                                         target_size = target_size, 
                                         batch_size = batch_size
                                         )
     # Todo

    return train_gen


def make_val_gen(val_idg, val_df, x_col, y_col, target_size, batch_size):

#     my_val_idg = my_image_augmentation()
    
    val_gen = val_idg.flow_from_dataframe(dataframe = val_df, 
                                             directory=None, 
                                             x_col = x_col,
                                             y_col = y_col,
                                             class_mode = 'binary',
                                             target_size = target_size, 
                                             batch_size = batch_size) 
    
    # Todo
    return val_gen

Note: Due to error requiring that the classes should be string since the class_mode is set to binary

train_df['Pneumonia'] = train_df['Pneumonia'].astype('str')
val_df['Pneumonia'] = val_df['Pneumonia'].astype('str')
train_idg = my_image_augmentation(True, False, 0.1, 0.1, 20, 0.1, 0.1)
val_idg = my_image_augmentation()
train_gen = make_train_gen(train_idg=train_idg, 
                           train_df=train_df, 
                           x_col='path', 
                           y_col='Pneumonia', 
                           target_size=IMG_SIZE, 
                           batch_size=22)
val_gen = make_val_gen(val_idg, val_df, 'path', 'Pneumonia', IMG_SIZE, 22)
Found 2288 validated image filenames belonging to 2 classes.
Found 1144 validated image filenames belonging to 2 classes.
## May want to pull a single large batch of random validation data for testing after each epoch:
valX, valY = val_gen.next()
## May want to look at some examples of our augmented training data. 
## This is helpful for understanding the extent to which data is being manipulated prior to training, 
## and can be compared with how the raw data look prior to augmentation

t_x, t_y = next(train_gen)
fig, m_axs = plt.subplots(4, 4, figsize = (16, 16))
for (c_x, c_y, c_ax) in zip(t_x, t_y, m_axs.flatten()):
    c_ax.imshow(c_x[:,:,0], cmap = 'bone')
    if c_y == 1: 
        c_ax.set_title('Pneumonia')
    else:
        c_ax.set_title('No Pneumonia')
    c_ax.axis('off')

Build your model:

Recommendation here to use a pre-trained network downloaded from Keras for fine-tuning

model = VGG16(include_top=True, weights='imagenet')
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5
553467904/553467096 [==============================] - 7s 0us/step
model.summary()
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 4096)              102764544 
_________________________________________________________________
fc2 (Dense)                  (None, 4096)              16781312  
_________________________________________________________________
predictions (Dense)          (None, 1000)              4097000   
=================================================================
Total params: 138,357,544
Trainable params: 138,357,544
Non-trainable params: 0
_________________________________________________________________
def load_pretrained_model():
    
    model = VGG16(include_top=True, weights='imagenet')
    transfer_layer = model.get_layer('block5_pool')
    vgg_model = Model(inputs = model.input, outputs = transfer_layer.output)
    
    # Todo
    for layer in vgg_model.layers[0:-2]:
        layer.trainable = False
    
    return vgg_model
vgg_model = load_pretrained_model()
vgg_model.summary()
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_2 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
=================================================================
Total params: 14,714,688
Trainable params: 2,359,808
Non-trainable params: 12,354,880
_________________________________________________________________
def build_my_model(pre_trained):
    
    my_model = Sequential()

    # Add the convolutional part of the VGG16 model from above.
    my_model.add(vgg_model)

    my_model.add(Flatten())
    # Flatten the output of the VGG16 model because it is from a
    # convolutional layer.


    my_model.add(Dense(1024, activation='relu'))
    my_model.add(Dropout(0.5))

    my_model.add(Dense(512, activation='relu'))
    my_model.add(Dropout(0.5))

    my_model.add(Dense(256, activation='relu'))
    my_model.add(Dropout(0.5))

    # Final output layer

    # Add a dense (aka. fully-connected) layer.
    # This is for combining features that the VGG16 model has
    # recognized in the image.
    my_model.add(Dense(1, activation='sigmoid'))

    ## Set our optimizer, loss function, and learning rate (you can change the learning rate here if you'd like)
    ## but otherwise this cell can be run as is

    
    return my_model
model = build_my_model(vgg_model)
optimizer = Adam(lr=0.001)
loss = 'binary_crossentropy'
metrics = ['binary_accuracy']
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
model.summary()
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
model_2 (Model)              (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_2 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_5 (Dense)              (None, 1024)              25691136  
_________________________________________________________________
dropout_4 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_5 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_6 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 257       
=================================================================
Total params: 41,062,209
Trainable params: 28,707,329
Non-trainable params: 12,354,880
_________________________________________________________________
## Below is some helper code that will allow you to add checkpoints to your model,
## This will save the 'best' version of your model by comparing it to previous epochs of training

## Note that you need to choose which metric to monitor for your model's 'best' performance if using this code. 
## The 'patience' parameter is set to 10, meaning that your model will train for ten epochs without seeing
## improvement before quitting

# Todo

weight_path="{}_my_model2.best.hdf5".format('xray_class')

checkpoint = ModelCheckpoint(weight_path, 
                             monitor= 'val_loss', 
                             verbose=1, 
                             save_best_only=True, 
                             mode= 'min', 
                             save_weights_only = True)

early = EarlyStopping(monitor= 'val_loss', 
                      mode= 'min', 
                      patience=10)

callbacks_list = [checkpoint, early]

Training with data less Mass and Infiltration diseases

Later we will run it again below without removing for compairosn

  • Rerun cells after training but skipping the removal of these two diseases

Start training!

history = model.fit_generator(train_gen, validation_data=[valX, valY], epochs=30, callbacks=callbacks_list )
Epoch 1/30
58/58 [==============================] - 37s 645ms/step - loss: 0.8703 - binary_accuracy: 0.5087 - val_loss: 0.6810 - val_binary_accuracy: 0.6364

Epoch 00001: val_loss improved from inf to 0.68097, saving model to xray_class_my_model.best.hdf5
Epoch 2/30
58/58 [==============================] - 33s 569ms/step - loss: 0.6919 - binary_accuracy: 0.5135 - val_loss: 0.7292 - val_binary_accuracy: 0.3182

Epoch 00002: val_loss did not improve from 0.68097
Epoch 3/30
58/58 [==============================] - 33s 571ms/step - loss: 0.6929 - binary_accuracy: 0.5349 - val_loss: 0.7089 - val_binary_accuracy: 0.3636

Epoch 00003: val_loss did not improve from 0.68097
Epoch 4/30
58/58 [==============================] - 32s 559ms/step - loss: 0.6906 - binary_accuracy: 0.5349 - val_loss: 0.7905 - val_binary_accuracy: 0.3636

Epoch 00004: val_loss did not improve from 0.68097
Epoch 5/30
58/58 [==============================] - 33s 573ms/step - loss: 0.6904 - binary_accuracy: 0.5230 - val_loss: 0.6647 - val_binary_accuracy: 0.6364

Epoch 00005: val_loss improved from 0.68097 to 0.66472, saving model to xray_class_my_model.best.hdf5
Epoch 6/30
58/58 [==============================] - 33s 572ms/step - loss: 0.6939 - binary_accuracy: 0.5190 - val_loss: 0.7036 - val_binary_accuracy: 0.3182

Epoch 00006: val_loss did not improve from 0.66472
Epoch 7/30
58/58 [==============================] - 33s 563ms/step - loss: 0.6859 - binary_accuracy: 0.5381 - val_loss: 0.8723 - val_binary_accuracy: 0.3182

Epoch 00007: val_loss did not improve from 0.66472
Epoch 8/30
58/58 [==============================] - 34s 578ms/step - loss: 0.6943 - binary_accuracy: 0.5317 - val_loss: 0.7141 - val_binary_accuracy: 0.4091

Epoch 00008: val_loss did not improve from 0.66472
Epoch 9/30
58/58 [==============================] - 33s 571ms/step - loss: 0.6877 - binary_accuracy: 0.5214 - val_loss: 0.6806 - val_binary_accuracy: 0.5455

Epoch 00009: val_loss did not improve from 0.66472
Epoch 10/30
58/58 [==============================] - 33s 572ms/step - loss: 0.6825 - binary_accuracy: 0.5484 - val_loss: 0.6835 - val_binary_accuracy: 0.4091

Epoch 00010: val_loss did not improve from 0.66472
Epoch 11/30
58/58 [==============================] - 33s 571ms/step - loss: 0.6948 - binary_accuracy: 0.5349 - val_loss: 0.7128 - val_binary_accuracy: 0.3636

Epoch 00011: val_loss did not improve from 0.66472
Epoch 12/30
58/58 [==============================] - 33s 572ms/step - loss: 0.6943 - binary_accuracy: 0.4952 - val_loss: 0.6914 - val_binary_accuracy: 0.3182

Epoch 00012: val_loss did not improve from 0.66472
Epoch 13/30
58/58 [==============================] - 33s 566ms/step - loss: 0.6928 - binary_accuracy: 0.5190 - val_loss: 0.6955 - val_binary_accuracy: 0.5455

Epoch 00013: val_loss did not improve from 0.66472
Epoch 14/30
58/58 [==============================] - 33s 566ms/step - loss: 0.6896 - binary_accuracy: 0.5389 - val_loss: 0.6792 - val_binary_accuracy: 0.5909

Epoch 00014: val_loss did not improve from 0.66472
Epoch 15/30
58/58 [==============================] - 33s 569ms/step - loss: 0.6909 - binary_accuracy: 0.5619 - val_loss: 0.6724 - val_binary_accuracy: 0.6364

Epoch 00015: val_loss did not improve from 0.66472
history.history.keys()
dict_keys(['val_loss', 'val_binary_accuracy', 'loss', 'binary_accuracy'])
After training for some time, look at the performance of your model by plotting some performance statistics:

Note, these figures will come in handy for your FDA documentation later in the project

!ls
'Build and train model.ipynb'   test2.dcm
 EDA.ipynb		        test3.dcm
 FDA_Submission_Template.md     test4.dcm
 grayrange.gif		        test5.dcm
 Inference.ipynb	        test6.dcm
 my_model.json		        Trial.ipynb
 sample_labels.csv	        xray_class_my_model.best.hdf5
 test1.dcm
## After training, make some predictions to assess your model's overall performance
## Note that detecting pneumonia is hard even for trained expert radiologists, 
## so there is no need to make the model perfect.
model.load_weights(weight_path)
pred_Y = model.predict(valX, batch_size = 32, verbose = True)
22/22 [==============================] - 0s 18ms/step

Create a binary output instead of just probability using a standard 0.5 threshold for now

pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]
def plot_auc(t_y, p_y):
    
    ## Hint: can use scikit-learn's built in functions here like roc_curve
    
    # Todo
    fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
    fpr, tpr, thresholds = roc_curve(t_y, p_y)
    c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % ('Pneumonia', auc(fpr, tpr)))
    c_ax.legend()
    c_ax.set_xlabel('False Positive Rate')
    c_ax.set_ylabel('True Positive Rate')
    
    return

## what other performance statistics do you want to include here besides AUC? 


# def ... 
# Todo

def plot_pr(t_y, p_y):
    fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
    precision, recall, thresholds = precision_recall_curve(t_y, p_y)
    c_ax.plot(precision, recall, label = '%s (AP Score:%0.2f)'  % ('Pneumonia', average_precision_score(t_y,p_y)))
    c_ax.legend()
    c_ax.set_xlabel('Recall')
    c_ax.set_ylabel('Precision')
    
#Also consider plotting the history of your model training:

def plot_history(history, kind):
    '''
    kind: either "accuracy" or "loss"
    '''
    plt.figure(figsize = (9, 9))
    kind = 'binary_accuracy' if kind == 'accuracy' else kind
    plt.plot(history.history[f'val_{kind}'], label=f'validation {kind}')
    plt.plot(history.history[f'{kind}'], label=f'training {kind}')
    plt.title(f'Validation/Training {kind}')
    plt.ylabel('EPOCHS')
    plt.legend()
    plt.show()
    
    return
plot_auc(valY, pred_Y)
## plot figures
plot_history(history, 'accuracy')
# Todo
plot_history(history, 'loss')

Once you feel you are done training, you'll need to decide the proper classification threshold that optimizes your model's performance for a given metric (e.g. accuracy, F1, precision, etc. You decide)

confusion_matrix(pred_Y_binary, valY)
array([[13,  6],
       [ 2,  1]])
tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
accuracy_score(pred_Y_binary, valY)
f1_score(pred_Y_binary, valY)
sens = tp/(tp+fn)
spec = tn/(tn+fp)
spec
0.6842105263157895
precision, recall, thresholds = precision_recall_curve(valY, pred_Y)
## Find the threshold that optimize your model's performance,
## and use that threshold to make binary classification. Make sure you take all your metrics into consideration.

# Todo
val_gen = make_val_gen(val_idg, val_df, 'path', 'Pneumonia', IMG_SIZE, 100)
valX, valY = val_gen.next()
Found 632 validated image filenames belonging to 2 classes.
pred_Y = model.predict(valX, batch_size = 32, verbose = True)
100/100 [==============================] - 5s 48ms/step
## Let's look at some examples of true vs. predicted with our best model: 

# Todo

fig, m_axs = plt.subplots(10, 10, figsize = (16, 16))
i = 0
for (c_x, c_y, c_ax) in zip(valX[0:100], valY[0:100], m_axs.flatten()):
    c_ax.imshow(c_x[:,:,0], cmap = 'bone')
    if c_y == 1: 
        if pred_Y[i] > 0.5:
            c_ax.set_title('1, 1')
        else:
            c_ax.set_title('1, 0', color='red')
    else:
        if pred_Y[i] > 0.5: 
            c_ax.set_title('0, 1', color='red')
        else:
            c_ax.set_title('0, 0')
    c_ax.axis('off')
    i=i+1
pred_Y_binary = pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]
confusion_matrix(pred_Y_binary, valY)
array([[71, 21],
       [ 5,  3]])
acc = accuracy_score(pred_Y_binary, valY)
f1 = f1_score(pred_Y_binary, valY)
sens = tp/(tp+fn)
spec = tn/(tn+fp)
spec
0.6842105263157895
plot_auc(valY, pred_Y)
plot_pr(valY, pred_Y)
## Just save model architecture to a .json:

model_json = model.to_json()
with open("my_model.json", "w") as json_file:
    json_file.write(model_json)
!ls
'Build and train model.ipynb'   test2.dcm
 EDA.ipynb		        test3.dcm
 FDA_Submission_Template.md     test4.dcm
 grayrange.gif		        test5.dcm
 Inference.ipynb	        test6.dcm
 my_model.json		        Trial.ipynb
 sample_labels.csv	        xray_class_my_model.best.hdf5
 test1.dcm

Using saved model for further analysis and testing

json_file = open('my_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()

loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("xray_class_my_model.best.hdf5")
print("Loaded model from disk")
Loaded model from disk
loaded_model.summary()
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
model_1 (Model)              (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              25691136  
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_3 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 257       
=================================================================
Total params: 41,062,209
Trainable params: 28,707,329
Non-trainable params: 12,354,880
_________________________________________________________________
val_gen = make_val_gen(val_idg, val_df, 'path', 'Pneumonia', IMG_SIZE, 100)
valX, valY = val_gen.next()
pred_Y = loaded_model.predict(valX, batch_size = 32, verbose = True)
Found 1144 validated image filenames belonging to 2 classes.
100/100 [==============================] - 58s 582ms/step
fig, m_axs = plt.subplots(10, 10, figsize = (16, 16))
i = 0
for (c_x, c_y, c_ax) in zip(valX[0:100], valY[0:100], m_axs.flatten()):
    c_ax.imshow(c_x[:,:,0], cmap = 'bone')
    if c_y == 1: 
        if pred_Y[i] > 0.5:
            c_ax.set_title('1, 1')
        else:
            c_ax.set_title('1, 0', color='red')
    else:
        if pred_Y[i] > 0.5: 
            c_ax.set_title('0, 1', color='red')
        else:
            c_ax.set_title('0, 0')
    c_ax.axis('off')
    i=i+1
# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
for t in [0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
    print(f'when threshold at {t} CF:')
    print(confusion_matrix(pred_Y_binary, valY))
    tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
    print(f'\x1b[34maccuracy \x1b[0m= \x1b[31m{accuracy_score(pred_Y_binary, valY)}\x1b[0m')
    print(f'F1 Score = {f1_score(pred_Y_binary, valY)}')
    print(f'\x1b[34mSensitivity Score \x1b[0m= \x1b[31m{tp/(tp+fn)}\x1b[0m')
    print(f'\x1b[34mSpecificity Score \x1b[0m= \x1b[31m{tn/(tn+fp)}\x1b[0m')
    print('-------------------------------')
when threshold at 0.4 CF:
[[ 1  1]
 [72 26]]
accuracy = 0.27
F1 Score = 0.41600000000000004
Sensitivity Score = 0.2653061224489796
Specificity Score = 0.5
-------------------------------
when threshold at 0.5 CF:
[[59 25]
 [14  2]]
accuracy = 0.61
F1 Score = 0.09302325581395349
Sensitivity Score = 0.125
Specificity Score = 0.7023809523809523
-------------------------------
when threshold at 0.6 CF:
[[73 27]
 [ 0  0]]
accuracy = 0.73
F1 Score = 0.0
Sensitivity Score = nan
Specificity Score = 0.73
-------------------------------
when threshold at 0.7 CF:
[[73 27]
 [ 0  0]]
accuracy = 0.73
F1 Score = 0.0
Sensitivity Score = nan
Specificity Score = 0.73
-------------------------------
when threshold at 0.8 CF:
[[73 27]
 [ 0  0]]
accuracy = 0.73
F1 Score = 0.0
Sensitivity Score = nan
Specificity Score = 0.73
-------------------------------
when threshold at 0.9 CF:
[[73 27]
 [ 0  0]]
accuracy = 0.73
F1 Score = 0.0
Sensitivity Score = nan
Specificity Score = 0.73
-------------------------------
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in long_scalars
  # Remove the CWD from sys.path while we load stuff.
# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
sens = []
spec = []
for t in thresholds:
    pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
    tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
    sens.append(tp/(tp+fn))
    spec.append(tn/(tn+fp))

plt.figure(figsize=(10,6))
plt.plot(thresholds, sens, label='Sensitivity')
plt.plot(thresholds, spec, label='Specificity')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.legend()
plt.show()
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in long_scalars
  # Remove the CWD from sys.path while we load stuff.
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:9: RuntimeWarning: invalid value encountered in long_scalars
  if __name__ == '__main__':
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
fscore = []
for t in thresholds:
    pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
    fscore.append(f1_score(valY, pred_Y_binary))

plt.figure(figsize=(10,6))
plt.plot(thresholds, fscore, label='f1 score')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.title('F1 Score vs Threshold')
plt.legend()
plt.show()
# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
prec = []
recall = []
for t in thresholds:
    pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
    tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
    prec.append(tp/(tp+fp))
    recall.append(tp/(tp+fn))

plt.figure(figsize=(10,6))
plt.plot(thresholds, prec, label='Precision')
plt.plot(thresholds, recall, label='Recall')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.title('Precision and Recall vs Threshold ')
plt.legend()
plt.show()
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in long_scalars
  # Remove the CWD from sys.path while we load stuff.

2. Redo Training

This time without removing diseases

history = model.fit_generator(train_gen, validation_data=[valX, valY], epochs=30, callbacks=callbacks_list )
Epoch 1/30
104/104 [==============================] - 65s 625ms/step - loss: 0.6983 - binary_accuracy: 0.5184 - val_loss: 0.6378 - val_binary_accuracy: 0.6818

Epoch 00001: val_loss improved from inf to 0.63784, saving model to xray_class_my_model2.best.hdf5
Epoch 2/30
104/104 [==============================] - 60s 578ms/step - loss: 0.6882 - binary_accuracy: 0.5472 - val_loss: 0.6362 - val_binary_accuracy: 0.5455

Epoch 00002: val_loss improved from 0.63784 to 0.63619, saving model to xray_class_my_model2.best.hdf5
Epoch 3/30
104/104 [==============================] - 60s 577ms/step - loss: 0.6862 - binary_accuracy: 0.5590 - val_loss: 0.6303 - val_binary_accuracy: 0.5000

Epoch 00003: val_loss improved from 0.63619 to 0.63032, saving model to xray_class_my_model2.best.hdf5
Epoch 4/30
104/104 [==============================] - 60s 574ms/step - loss: 0.6821 - binary_accuracy: 0.5629 - val_loss: 0.7177 - val_binary_accuracy: 0.2727

Epoch 00004: val_loss did not improve from 0.63032
Epoch 5/30
104/104 [==============================] - 61s 587ms/step - loss: 0.6770 - binary_accuracy: 0.5909 - val_loss: 0.6853 - val_binary_accuracy: 0.4545

Epoch 00005: val_loss did not improve from 0.63032
Epoch 6/30
104/104 [==============================] - 60s 572ms/step - loss: 0.6769 - binary_accuracy: 0.5795 - val_loss: 0.7305 - val_binary_accuracy: 0.2273

Epoch 00006: val_loss did not improve from 0.63032
Epoch 7/30
104/104 [==============================] - 59s 570ms/step - loss: 0.6777 - binary_accuracy: 0.5826 - val_loss: 0.6393 - val_binary_accuracy: 0.5000

Epoch 00007: val_loss did not improve from 0.63032
Epoch 8/30
104/104 [==============================] - 60s 574ms/step - loss: 0.6753 - binary_accuracy: 0.5743 - val_loss: 0.6945 - val_binary_accuracy: 0.4091

Epoch 00008: val_loss did not improve from 0.63032
Epoch 9/30
104/104 [==============================] - 59s 571ms/step - loss: 0.6718 - binary_accuracy: 0.6023 - val_loss: 0.6987 - val_binary_accuracy: 0.4091

Epoch 00009: val_loss did not improve from 0.63032
Epoch 10/30
104/104 [==============================] - 60s 575ms/step - loss: 0.6603 - binary_accuracy: 0.6058 - val_loss: 0.7548 - val_binary_accuracy: 0.3636

Epoch 00010: val_loss did not improve from 0.63032
Epoch 11/30
104/104 [==============================] - 60s 580ms/step - loss: 0.6691 - binary_accuracy: 0.5957 - val_loss: 0.7652 - val_binary_accuracy: 0.3182

Epoch 00011: val_loss did not improve from 0.63032
Epoch 12/30
104/104 [==============================] - 60s 573ms/step - loss: 0.6628 - binary_accuracy: 0.6031 - val_loss: 0.7235 - val_binary_accuracy: 0.3636

Epoch 00012: val_loss did not improve from 0.63032
Epoch 13/30
104/104 [==============================] - 60s 577ms/step - loss: 0.6698 - binary_accuracy: 0.5988 - val_loss: 0.7546 - val_binary_accuracy: 0.2273

Epoch 00013: val_loss did not improve from 0.63032
## After training, make some predictions to assess your model's overall performance
## Note that detecting pneumonia is hard even for trained expert radiologists, 
## so there is no need to make the model perfect.
model.load_weights(weight_path)
pred_Y = model.predict(valX, batch_size = 32, verbose = True)
22/22 [==============================] - 0s 15ms/step

Create a binary output instead of just probability using a standard 0.5 threshold for now

pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]
def plot_auc(t_y, p_y):
    
    ## Hint: can use scikit-learn's built in functions here like roc_curve
    
    # Todo
    fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
    fpr, tpr, thresholds = roc_curve(t_y, p_y)
    c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % ('Pneumonia', auc(fpr, tpr)))
    c_ax.legend()
    c_ax.set_xlabel('False Positive Rate')
    c_ax.set_ylabel('True Positive Rate')
    
    return

## what other performance statistics do you want to include here besides AUC? 


# def ... 
# Todo

def plot_pr(t_y, p_y):
    fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
    precision, recall, thresholds = precision_recall_curve(t_y, p_y)
    c_ax.plot(precision, recall, label = '%s (AP Score:%0.2f)'  % ('Pneumonia', average_precision_score(t_y,p_y)))
    c_ax.legend()
    c_ax.set_xlabel('Recall')
    c_ax.set_ylabel('Precision')
    
#Also consider plotting the history of your model training:

def plot_history(history, kind):
    '''
    kind: either "accuracy" or "loss"
    '''
    plt.figure(figsize = (9, 9))
    kind = 'binary_accuracy' if kind == 'accuracy' else kind
    plt.plot(history.history[f'val_{kind}'], label=f'validation {kind}')
    plt.plot(history.history[f'{kind}'], label=f'training {kind}')
    plt.title(f'Validation/Training {kind}')
    plt.ylabel('EPOCHS')
    plt.legend()
    plt.show()
    
    return
plot_auc(valY, pred_Y)
## plot figures
plot_history(history, 'accuracy')
# Todo
plot_history(history, 'loss')

Once you feel you are done training, you'll need to decide the proper classification threshold that optimizes your model's performance for a given metric (e.g. accuracy, F1, precision, etc. You decide)

confusion_matrix(pred_Y_binary, valY)
array([[9, 2],
       [9, 2]])
tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
precision, recall, thresholds = precision_recall_curve(valY, pred_Y)
val_gen = make_val_gen(val_idg, val_df, 'path', 'Pneumonia', IMG_SIZE, 100)
valX, valY = val_gen.next()
Found 1144 validated image filenames belonging to 2 classes.
pred_Y = model.predict(valX, batch_size = 32, verbose = True)
100/100 [==============================] - 1s 9ms/step
pred_Y_binary = pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]
confusion_matrix(pred_Y_binary, valY)
array([[45, 10],
       [32, 13]])
acc = accuracy_score(pred_Y_binary, valY)
f1 = f1_score(pred_Y_binary, valY)
sens = tp/(tp+fn)
spec = tn/(tn+fp)
spec
0.8181818181818182
plot_auc(valY, pred_Y)
plot_pr(valY, pred_Y)
## Just save model architecture to a .json:

model_json = model.to_json()
with open("my_model2.json", "w") as json_file:
    json_file.write(model_json)
json_file = open('my_model2.json', 'r')
loaded_model_json = json_file.read()
json_file.close()

loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("xray_class_my_model2.best.hdf5")
print("Loaded model from disk")
Loaded model from disk
val_gen = make_val_gen(val_idg, val_df, 'path', 'Pneumonia', IMG_SIZE, 100)
valX, valY = val_gen.next()
pred_Y = loaded_model.predict(valX, batch_size = 32, verbose = True)
Found 1144 validated image filenames belonging to 2 classes.
100/100 [==============================] - 58s 584ms/step
# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
for t in [0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
    print(f'when threshold at {t} CF:')
    print(confusion_matrix(pred_Y_binary, valY))
    tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
    print(f'\x1b[34maccuracy \x1b[0m= \x1b[31m{accuracy_score(pred_Y_binary, valY)}\x1b[0m')
    print(f'F1 Score = {f1_score(pred_Y_binary, valY)}')
    print(f'\x1b[34mSensitivity Score \x1b[0m= \x1b[31m{tp/(tp+fn)}\x1b[0m')
    print(f'\x1b[34mSpecificity Score \x1b[0m= \x1b[31m{tn/(tn+fp)}\x1b[0m')
    print('-------------------------------')
when threshold at 0.4 CF:
[[32  5]
 [47 16]]
accuracy = 0.48
F1 Score = 0.38095238095238093
Sensitivity Score = 0.25396825396825395
Specificity Score = 0.8648648648648649
-------------------------------
when threshold at 0.5 CF:
[[47 10]
 [32 11]]
accuracy = 0.58
F1 Score = 0.34375
Sensitivity Score = 0.2558139534883721
Specificity Score = 0.8245614035087719
-------------------------------
when threshold at 0.6 CF:
[[79 21]
 [ 0  0]]
accuracy = 0.79
F1 Score = 0.0
Sensitivity Score = nan
Specificity Score = 0.79
-------------------------------
when threshold at 0.7 CF:
[[79 21]
 [ 0  0]]
accuracy = 0.79
F1 Score = 0.0
Sensitivity Score = nan
Specificity Score = 0.79
-------------------------------
when threshold at 0.8 CF:
[[79 21]
 [ 0  0]]
accuracy = 0.79
F1 Score = 0.0
Sensitivity Score = nan
Specificity Score = 0.79
-------------------------------
when threshold at 0.9 CF:
[[79 21]
 [ 0  0]]
accuracy = 0.79
F1 Score = 0.0
Sensitivity Score = nan
Specificity Score = 0.79
-------------------------------
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in long_scalars
  # Remove the CWD from sys.path while we load stuff.
# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
sens = []
spec = []
for t in thresholds:
    pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
    tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
    sens.append(tp/(tp+fn))
    spec.append(tn/(tn+fp))

plt.figure(figsize=(10,6))
plt.plot(thresholds, sens, label='Sensitivity')
plt.plot(thresholds, spec, label='Specificity')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.legend()
plt.show()
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:9: RuntimeWarning: invalid value encountered in long_scalars
  if __name__ == '__main__':
fscore = []
for t in thresholds:
    pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
    fscore.append(f1_score(valY, pred_Y_binary))

plt.figure(figsize=(10,6))
plt.plot(thresholds, fscore, label='f1 score')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.title('F1 Score vs Threshold')
plt.legend()
plt.show()
# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
prec = []
recall = []
for t in thresholds:
    pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
    tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
    prec.append(tp/(tp+fp))
    recall.append(tp/(tp+fn))

plt.figure(figsize=(10,6))
plt.plot(thresholds, prec, label='Precision')
plt.plot(thresholds, recall, label='Recall')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.title('Precision and Recall vs Threshold ')
plt.legend()
plt.show()
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in long_scalars
  # Remove the CWD from sys.path while we load stuff.

Additional Testing

  • Test how the model scores when Pneumonia is existint with other diseases
import random
p_t = all_xray_df[all_xray_df['Pneumonia'] == 1]
s = p_t['Finding Labels'].unique().tolist()
p_t['Finding Labels'].value_counts().nlargest(10)
Pneumonia                             322
Infiltration|Pneumonia                199
Edema|Infiltration|Pneumonia          137
Atelectasis|Pneumonia                 108
Edema|Pneumonia                        83
Effusion|Pneumonia                     53
Effusion|Infiltration|Pneumonia        42
Consolidation|Pneumonia                36
Atelectasis|Infiltration|Pneumonia     34
Atelectasis|Effusion|Pneumonia         23
Name: Finding Labels, dtype: int64
cols = ['Pneumonia', 
        'Infiltration|Pneumonia',
        'Edema|Infiltration|Pneumonia',
        'Atelectasis|Pneumonia',
        'Edema|Pneumonia',
        'Effusion|Pneumonia',
       'Effusion|Infiltration|Pneumonia',
       'Consolidation|Pneumonia',
       'Atelectasis|Infiltration|Pneumonia']
for r in cols:
    idx = p_t[p_t['Finding Labels'].isin([r])].index
    n_t = all_xray_df[all_xray_df['No Finding'] == 1]
    idx2 = n_t.index
    idx2 = random.choices(idx2, k=(len(idx)*3))
    idx = idx.union(idx2)
    val_df_2 = val_df[val_df.index.isin(idx)]
    try:
        val_gen = make_val_gen(val_idg, val_df_2, 'path', 'Pneumonia', IMG_SIZE, 100)
        valX, valY = val_gen.next()
        pred_Y = loaded_model.predict(valX, batch_size = 32, verbose = True)
        pred_Y_binary = pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]
        tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
        prec = tp/(tp+fp)
        acc = accuracy_score(pred_Y_binary, valY)
        f1 = f1_score(pred_Y_binary, valY)
        sens = tp/(tp+fn)
        spec = tn/(tn+fp)
        print(f'***************{r}***************')
        print('True Positive:', tp, end=(', '))
        print('True Negative:', tn, end=(', '))
        print('False Positive', fp, end=(', '))
        print('False Negative:', fn)
        print('Sensitivity:', sens, end=(', '))
        print('Specificity:', spec, end=(', '))
        print('Precision:', prec, end=(', '))
        print('F1 Score:', f1, end=(', '))    
    except:
        continue
    
Found 77 validated image filenames belonging to 2 classes.
77/77 [==============================] - 45s 583ms/step
***************Pneumonia***************
True Positive: 35, True Negative: 2, False Positive 37, False Negative: 3
Sensitivity: 0.9210526315789473, Specificity: 0.05128205128205128, Precision: 0.4861111111111111, F1 Score: 0.6363636363636362, Found 50 validated image filenames belonging to 2 classes.
50/50 [==============================] - 29s 576ms/step
***************Infiltration|Pneumonia***************
True Positive: 25, True Negative: 2, False Positive 17, False Negative: 6
Sensitivity: 0.8064516129032258, Specificity: 0.10526315789473684, Precision: 0.5952380952380952, F1 Score: 0.684931506849315, Found 25 validated image filenames belonging to 2 classes.
25/25 [==============================] - 14s 575ms/step
***************Edema|Infiltration|Pneumonia***************
True Positive: 16, True Negative: 5, False Positive 4, False Negative: 0
Sensitivity: 1.0, Specificity: 0.5555555555555556, Precision: 0.8, F1 Score: 0.888888888888889, Found 23 validated image filenames belonging to 2 classes.
23/23 [==============================] - 13s 581ms/step
***************Atelectasis|Pneumonia***************
True Positive: 8, True Negative: 3, False Positive 10, False Negative: 2
Sensitivity: 0.8, Specificity: 0.23076923076923078, Precision: 0.4444444444444444, F1 Score: 0.5714285714285714, Found 21 validated image filenames belonging to 2 classes.
21/21 [==============================] - 12s 573ms/step
***************Edema|Pneumonia***************
True Positive: 14, True Negative: 3, False Positive 4, False Negative: 0
Sensitivity: 1.0, Specificity: 0.42857142857142855, Precision: 0.7777777777777778, F1 Score: 0.8750000000000001, Found 15 validated image filenames belonging to 2 classes.
15/15 [==============================] - 9s 578ms/step
***************Effusion|Infiltration|Pneumonia***************
True Positive: 9, True Negative: 2, False Positive 4, False Negative: 0
Sensitivity: 1.0, Specificity: 0.3333333333333333, Precision: 0.6923076923076923, F1 Score: 0.8181818181818181, 

Just focusing on Sensitivity for now

sensitivity = {}
precision = {}

for r in s:
    idx = p_t[p_t['Finding Labels'].isin([r])].index
    n_t = all_xray_df[all_xray_df['No Finding'] == 1]
    idx2 = n_t.index
    idx2 = random.choices(idx2, k=(len(idx)*3))
    idx = idx.union(idx2)
    val_df_2 = val_df[val_df.index.isin(idx)]
    try:
        val_gen = make_val_gen(val_idg, val_df_2, 'path', 'Pneumonia', IMG_SIZE, 100)
        valX, valY = val_gen.next()
        pred_Y = loaded_model.predict(valX, batch_size = 32, verbose = True)
        pred_Y_binary = pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]
        tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
        sens = tp/(tp+fn)  
        sensitivity[r] = sens
        prec = tp/(tp+fp)
        precision[r] = prec
        
        
    except:
        continue
Found 80 validated image filenames belonging to 2 classes.
80/80 [==============================] - 47s 590ms/step
Found 6 validated image filenames belonging to 2 classes.
6/6 [==============================] - 3s 581ms/step
Found 50 validated image filenames belonging to 2 classes.
50/50 [==============================] - 29s 586ms/step
Found 5 validated image filenames belonging to 2 classes.
5/5 [==============================] - 3s 571ms/step
Found 22 validated image filenames belonging to 2 classes.
22/22 [==============================] - 13s 582ms/step
Found 4 validated image filenames belonging to 2 classes.
4/4 [==============================] - 2s 577ms/step
Found 10 validated image filenames belonging to 2 classes.
10/10 [==============================] - 6s 585ms/step
Found 5 validated image filenames belonging to 2 classes.
5/5 [==============================] - 3s 568ms/step
Found 4 validated image filenames belonging to 2 classes.
4/4 [==============================] - 2s 582ms/step
Found 3 validated image filenames belonging to 2 classes.
3/3 [==============================] - 2s 581ms/step
Found 6 validated image filenames belonging to 2 classes.
6/6 [==============================] - 3s 576ms/step
Found 3 validated image filenames belonging to 2 classes.
3/3 [==============================] - 2s 589ms/step
Found 7 validated image filenames belonging to 2 classes.
7/7 [==============================] - 4s 582ms/step
sensitivity.keys()
dict_keys(['Nodule|Pneumonia', 'Pneumonia', 'Atelectasis|Infiltration|Pneumonia', 'Infiltration|Pneumonia', 'Edema|Infiltration|Pneumonia', 'Edema|Effusion|Pneumonia', 'Edema|Effusion|Infiltration|Pneumonia', 'Effusion|Pneumonia', 'Effusion|Infiltration|Pneumonia', 'Edema|Pneumonia', 'Infiltration|Pleural_Thickening|Pneumonia', 'Atelectasis|Consolidation|Pneumonia'])
plt.figure(figsize=(12,9))
plt.title('Sensitivity Score')
plt.bar(sensitivity.keys(), sensitivity.values())
plt.xticks(rotation='vertical')
plt.xlabel('Finding')
plt.ylabel('Score')
Text(0, 0.5, 'Score')
plt.figure(figsize=(12,9))
plt.title('Precision Score')
plt.bar(precision.keys(), precision.values())
plt.xticks(rotation='vertical')
plt.xlabel('Finding')
plt.ylabel('Score')
Text(0, 0.5, 'Score')