Load libraries

The code below provides a skeleton for the model building & training component of your project. You can add/remove/build on code however you see fit, this is meant as a starting point.

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from glob import glob
%matplotlib inline
import matplotlib.pyplot as plt

## General libraries
import seaborn as sns
from itertools import chain
import pydicom
from random import sample 

## Scikit-Learn
from skimage.io import imread, imshow
from skimage import io
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, auc, roc_auc_score, roc_curve, accuracy_score, precision_recall_curve, average_precision_score

## Keras 
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.models import Sequential, Model, model_from_json
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from keras.applications.vgg16 import VGG16

Using TensorFlow backend.

Do some early processing of your metadata for easier model training:

## Load the NIH data to all_xray_df
all_xray_df = pd.read_csv('/data/Data_Entry_2017.csv')
all_image_paths = {os.path.basename(x): x for x in 
                   glob(os.path.join('/data','images*', '*', '*.png'))}
print('Scans found:', len(all_image_paths), ', Total Headers', all_xray_df.shape[0])
all_xray_df['path'] = all_xray_df['Image Index'].map(all_image_paths.get)
all_xray_df.sample(3)

Scans found: 112120 , Total Headers 112120

all_xray_df.columns

Index(['Image Index', 'Finding Labels', 'Follow-up #', 'Patient ID',
       'Patient Age', 'Patient Gender', 'View Position', 'OriginalImage[Width',
       'Height]', 'OriginalImagePixelSpacing[x', 'y]', 'Unnamed: 11', 'path'],
      dtype='object')

Data Preprocessing - Reference EDA notebook

Create column for every disease
Remove Unnamed: 11 column
Remove Patient Age outlier data for ages above 100. Mostly these are ages 150-413 which don't make sense and obviously data entry error
Remove images for Mass and Infiltration based on the Analysis in EDA notebook showing similarity in Pixel Density Distribution with Pneumonia cases.

## Here you may want to create some extra columns in your table with binary indicators of certain diseases 
## rather than working directly with the 'Finding Labels' column

# Todo
all_labels = list(set(list(chain.from_iterable([i.split('|') for i in all_xray_df['Finding Labels']]))))
for c_label in all_labels:
    all_xray_df[c_label] = all_xray_df['Finding Labels'].map(lambda finding: 1.0 if c_label in finding else 0)
all_xray_df.sample(3)

all_xray_df.drop(columns=['Unnamed: 11'], inplace=True)

Remove Outliers in Patient Age

idx = all_xray_df[all_xray_df['Patient Age'] > 100].index.tolist()
all_xray_df.drop(index=idx, inplace=True)

# Check number of Pneumonia cases
sum(all_xray_df['Pneumonia'] == 1)

1430

all_labels

['Pneumonia',
 'Effusion',
 'Infiltration',
 'Consolidation',
 'Pleural_Thickening',
 'No Finding',
 'Edema',
 'Mass',
 'Pneumothorax',
 'Atelectasis',
 'Nodule',
 'Fibrosis',
 'Hernia',
 'Cardiomegaly',
 'Emphysema']

Remove images/rows for Diseases in `[Mass, Infiltration]`

all_xray_df.shape

(112104, 27)

# First round will use this
xray_df = all_xray_df[(all_xray_df['Mass'] == 0) & (all_xray_df['Infiltration'] == 0)]
xray_df.drop(columns=['Mass', 'Infiltration'], inplace=True)

/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py:3997: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,

xray_df.shape

(87591, 25)

sum(xray_df['Pneumonia'] == 1)

788

sum(xray_df['Pneumonia'] == 0)

86803

# Second round will use this instead
xray_df = all_xray_df.copy()

xray_df.shape

(112104, 27)

sum(xray_df['Pneumonia'] == 1)

1430

sum(xray_df['Pneumonia'] == 0)

110674

Quick Data Check

Deep analysis is done in the EDA Notebook

plt.figure(figsize=(10,6))
vals = all_xray_df[all_labels].sum().nlargest(15).sort_values()
colors = ['#d62728' if i =='Pneumonia' else '#1f77b4' for i in vals.index]
ax = vals.plot(kind='barh', color=colors)
ax.set(xlabel = 'Number of Images with Label')
ax.set(title='Total data distribution between findings')
plt.show()

plt.figure(figsize=(10,6))
vals = xray_df[xray_df.columns[12:].tolist()].sum().nlargest(15).sort_values()
colors = ['#d62728' if i =='Pneumonia' else '#1f77b4' for i in vals.index]
ax = vals.plot(kind='barh', color=colors)
ax.set(xlabel = 'Number of Images with Label')
ax.set(title='Data istribution between findings (Mass and Infiltration removed)')
plt.show()

Check on few data distributions before we split the data

xray_df['Patient Gender'].value_counts(normalize=True)

M    0.560423
F    0.439577
Name: Patient Gender, dtype: float64

First Round - after removing Mass and Infiltration

print(f"Total Pneumonia cases: {sum(xray_df['Pneumonia'] == 1)}")
print(f"Total Clear cases: {sum(xray_df['No Finding'] == 1)}")
print(f"Total Records: {len(xray_df)}")
print("-----------------")
print(f"Percent Pneumonia caes: {sum(xray_df['Pneumonia'] == 1)/len(xray_df)}")

Total Pneumonia cases: 788
Total Clear cases: 60353
Total Records: 87591
-----------------
Percent Pneumonia caes: 0.008996358073318035

Second Round - Wihtout removing Mass and Infiltration

print(f"Total Pneumonia cases: {sum(xray_df['Pneumonia'] == 1)}")
print(f"Total Clear cases: {sum(xray_df['No Finding'] == 1)}")
print(f"Total Records: {len(xray_df)}")
print("-----------------")
print(f"Percent Pneumonia caes: {sum(xray_df['Pneumonia'] == 1)/len(xray_df)}")

Total Pneumonia cases: 1430
Total Clear cases: 60353
Total Records: 112104
-----------------
Percent Pneumonia caes: 0.012756012274316705

Create your training and testing data:

cols = ['path', 'Pneumonia', 'Patient Gender', 'Patient Age', 'View Position']
dt = xray_df[cols]

def create_splits(data, test_size=0.2, strat=None):
    
    ## Either build your own or use a built-in library to split your original dataframe into two sets 
    ## that can be used for training and testing your model
    ## It's important to consider here how balanced or imbalanced you want each of those sets to be
    ## for the presence of pneumonia
    
    # Todo
    train_data, val_data = train_test_split(data, 
                                            test_size = test_size, 
                                            stratify = data[strat],
                                            random_state=42)

    return train_data, val_data

# Pass our parameters to the function

train_set, val_set = create_splits(dt, 0.2, 'Pneumonia')

train_size = len(train_set)
val_size = len(val_set)

print(f"Training: {sum(train_set['Pneumonia'] == 1)/train_size}, {sum(train_set['Pneumonia'] == 0)/train_size}")
print(f"Validation:, {sum(val_set['Pneumonia'] == 1)/val_size}, {sum(val_set['Pneumonia'] == 0)/val_size} ")

Training: 0.012756040721206917, 0.987243959278793
Validation:, 0.01275589848802462, 0.9872441015119754

Research Papers and Articles regarding Imbalanced Classes in Deep Learning

Note: From the research it seems a combination of undersampling and oversampling may produce the best results. This means undersampling the majority class which in our case is the Non-Penumonia and oversampling the minority class. Possibly in the future I will need to experiment further with these techniques. For now, I will just reduce the data from the majority class to bring them to a balance.

Handling Imbalance classes

Even though with Patient Gender there is a slight imbalance as well, I think it can be neglected for now. The current technique to balance Pneumonia and Non-Pneumonia classes will chunk out a lot of data already and I would like to preserve as much data as possible for training.

(1) Training Set `50/50` split

p_idx = train_set[train_set['Pneumonia']==1].index.tolist()
np_idx = train_set[train_set['Pneumonia']==0].index.tolist()

np_sample = sample(np_idx,len(p_idx))
train_df = train_set.loc[p_idx + np_sample]

train_df.shape

(2288, 5)

Let's check the data distribution between Positive and Negative

train_size = len(train_df)

print(f"Training: {sum(train_df['Pneumonia'] == 1)/train_size}, {sum(train_df['Pneumonia'] == 0)/train_size}")

Training: 0.5, 0.5

(2) Validation Set `25/75` split

p_idx = val_set[val_set['Pneumonia']==1].index.tolist()
np_idx = val_set[val_set['Pneumonia']==0].index.tolist()

np_sample = sample(np_idx,len(p_idx)*3)
val_df = val_set.loc[p_idx + np_sample]

val_df

val_df.shape

(1144, 5)

val_size = len(val_df)

print(f"Training: {sum(val_df['Pneumonia'] == 1)/val_size}, {sum(val_df['Pneumonia'] == 0)/val_size}")

Training: 0.25, 0.75

Checking - analysis after reducing the majority non-pneumonia class

from collections import Counter

Counter(train_df.Pneumonia)

Counter({1.0: 1144, 0.0: 1144})

Counter(val_df.Pneumonia)

Counter({1.0: 286, 0.0: 858})

train_df[train_df.Pneumonia == 1]['Patient Gender'].value_counts(normalize=True)

M    0.583916
F    0.416084
Name: Patient Gender, dtype: float64

val_df[val_df.Pneumonia == 1]['Patient Gender'].value_counts(normalize=True)

M    0.590909
F    0.409091
Name: Patient Gender, dtype: float64

Library imblearn to investigate and revisit in the future

# !pip install -U imbalanced-learn

# import imblearn
# from imblearn.over_sampling import SMOTE
# print(imblearn.__version__)

Now we can begin our model-building & training

First suggestion: perform some image augmentation on your data

## This is the image size that VGG16 takes as input
IMG_SIZE = (224, 224)

def my_image_augmentation(horizontal_flip=False, 
                          vertical_flip=False, 
                          height_shift_range=0,
                          width_shift_range=0,
                          rotation_range=0,
                          shear_range=0,
                          zoom_range=0):
    
    ## recommendation here to implement a package like Keras' ImageDataGenerator
    ## with some of the built-in augmentations 
    
    ## keep an eye out for types of augmentation that are or are not appropriate for medical imaging data
    ## Also keep in mind what sort of augmentation is or is not appropriate for testing vs validation data
    
    ## STAND-OUT SUGGESTION: implement some of your own custom augmentation that's *not*
    ## built into something like a Keras package
    
    # Todo
    img_aug = ImageDataGenerator(rescale=1. / 255.0,
                              horizontal_flip = horizontal_flip, 
                              vertical_flip = vertical_flip, 
                              height_shift_range= height_shift_range, 
                              width_shift_range=width_shift_range, 
                              rotation_range=rotation_range, 
                              shear_range = shear_range,
                              zoom_range=zoom_range)
    
    return img_aug


def make_train_gen(train_idg, train_df, x_col, y_col, target_size, batch_size):
    
    ## Create the actual generators using the output of my_image_augmentation for your training data
    ## Suggestion here to use the flow_from_dataframe library, e.g.:
#     my_train = my_image_augmentation(horizontal_flip = True, 
#                               vertical_flip = False, 
#                               height_shift_range= 0.1, 
#                               width_shift_range=0.1, 
#                               rotation_range=20, 
#                               shear_range = 0.1,
#                               zoom_range=0.1)
    
    train_gen = train_idg.flow_from_dataframe(dataframe=train_df, 
                                         directory=None, 
                                         x_col = x_col,
                                         y_col = y_col,
                                         class_mode = 'binary',
                                         target_size = target_size, 
                                         batch_size = batch_size
                                         )
     # Todo

    return train_gen


def make_val_gen(val_idg, val_df, x_col, y_col, target_size, batch_size):

#     my_val_idg = my_image_augmentation()
    
    val_gen = val_idg.flow_from_dataframe(dataframe = val_df, 
                                             directory=None, 
                                             x_col = x_col,
                                             y_col = y_col,
                                             class_mode = 'binary',
                                             target_size = target_size, 
                                             batch_size = batch_size) 
    
    # Todo
    return val_gen

Note: Due to error requiring that the classes should be string since the class_mode is set to binary

train_df['Pneumonia'] = train_df['Pneumonia'].astype('str')
val_df['Pneumonia'] = val_df['Pneumonia'].astype('str')

train_idg = my_image_augmentation(True, False, 0.1, 0.1, 20, 0.1, 0.1)
val_idg = my_image_augmentation()

train_gen = make_train_gen(train_idg=train_idg, 
                           train_df=train_df, 
                           x_col='path', 
                           y_col='Pneumonia', 
                           target_size=IMG_SIZE, 
                           batch_size=22)
val_gen = make_val_gen(val_idg, val_df, 'path', 'Pneumonia', IMG_SIZE, 22)

Found 2288 validated image filenames belonging to 2 classes.
Found 1144 validated image filenames belonging to 2 classes.

## May want to pull a single large batch of random validation data for testing after each epoch:
valX, valY = val_gen.next()

## May want to look at some examples of our augmented training data. 
## This is helpful for understanding the extent to which data is being manipulated prior to training, 
## and can be compared with how the raw data look prior to augmentation

t_x, t_y = next(train_gen)
fig, m_axs = plt.subplots(4, 4, figsize = (16, 16))
for (c_x, c_y, c_ax) in zip(t_x, t_y, m_axs.flatten()):
    c_ax.imshow(c_x[:,:,0], cmap = 'bone')
    if c_y == 1: 
        c_ax.set_title('Pneumonia')
    else:
        c_ax.set_title('No Pneumonia')
    c_ax.axis('off')

Build your model:

Recommendation here to use a pre-trained network downloaded from Keras for fine-tuning

model = VGG16(include_top=True, weights='imagenet')

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5
553467904/553467096 [==============================] - 7s 0us/step

model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 4096)              102764544 
_________________________________________________________________
fc2 (Dense)                  (None, 4096)              16781312  
_________________________________________________________________
predictions (Dense)          (None, 1000)              4097000   
=================================================================
Total params: 138,357,544
Trainable params: 138,357,544
Non-trainable params: 0
_________________________________________________________________

def load_pretrained_model():
    
    model = VGG16(include_top=True, weights='imagenet')
    transfer_layer = model.get_layer('block5_pool')
    vgg_model = Model(inputs = model.input, outputs = transfer_layer.output)
    
    # Todo
    for layer in vgg_model.layers[0:-2]:
        layer.trainable = False
    
    return vgg_model

vgg_model = load_pretrained_model()

vgg_model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_2 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
=================================================================
Total params: 14,714,688
Trainable params: 2,359,808
Non-trainable params: 12,354,880
_________________________________________________________________

def build_my_model(pre_trained):
    
    my_model = Sequential()

    # Add the convolutional part of the VGG16 model from above.
    my_model.add(vgg_model)

    my_model.add(Flatten())
    # Flatten the output of the VGG16 model because it is from a
    # convolutional layer.


    my_model.add(Dense(1024, activation='relu'))
    my_model.add(Dropout(0.5))

    my_model.add(Dense(512, activation='relu'))
    my_model.add(Dropout(0.5))

    my_model.add(Dense(256, activation='relu'))
    my_model.add(Dropout(0.5))

    # Final output layer

    # Add a dense (aka. fully-connected) layer.
    # This is for combining features that the VGG16 model has
    # recognized in the image.
    my_model.add(Dense(1, activation='sigmoid'))

    ## Set our optimizer, loss function, and learning rate (you can change the learning rate here if you'd like)
    ## but otherwise this cell can be run as is

    
    return my_model

model = build_my_model(vgg_model)

optimizer = Adam(lr=0.001)
loss = 'binary_crossentropy'
metrics = ['binary_accuracy']
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
model_2 (Model)              (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_2 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_5 (Dense)              (None, 1024)              25691136  
_________________________________________________________________
dropout_4 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_5 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_6 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 257       
=================================================================
Total params: 41,062,209
Trainable params: 28,707,329
Non-trainable params: 12,354,880
_________________________________________________________________

## Below is some helper code that will allow you to add checkpoints to your model,
## This will save the 'best' version of your model by comparing it to previous epochs of training

## Note that you need to choose which metric to monitor for your model's 'best' performance if using this code. 
## The 'patience' parameter is set to 10, meaning that your model will train for ten epochs without seeing
## improvement before quitting

# Todo

weight_path="{}_my_model2.best.hdf5".format('xray_class')

checkpoint = ModelCheckpoint(weight_path, 
                             monitor= 'val_loss', 
                             verbose=1, 
                             save_best_only=True, 
                             mode= 'min', 
                             save_weights_only = True)

early = EarlyStopping(monitor= 'val_loss', 
                      mode= 'min', 
                      patience=10)

callbacks_list = [checkpoint, early]

Training with data less `Mass` and `Infiltration` diseases

Later we will run it again below without removing for compairosn

Rerun cells after training but skipping the removal of these two diseases

Start training!

history = model.fit_generator(train_gen, validation_data=[valX, valY], epochs=30, callbacks=callbacks_list )

Epoch 1/30
58/58 [==============================] - 37s 645ms/step - loss: 0.8703 - binary_accuracy: 0.5087 - val_loss: 0.6810 - val_binary_accuracy: 0.6364

Epoch 00001: val_loss improved from inf to 0.68097, saving model to xray_class_my_model.best.hdf5
Epoch 2/30
58/58 [==============================] - 33s 569ms/step - loss: 0.6919 - binary_accuracy: 0.5135 - val_loss: 0.7292 - val_binary_accuracy: 0.3182

Epoch 00002: val_loss did not improve from 0.68097
Epoch 3/30
58/58 [==============================] - 33s 571ms/step - loss: 0.6929 - binary_accuracy: 0.5349 - val_loss: 0.7089 - val_binary_accuracy: 0.3636

Epoch 00003: val_loss did not improve from 0.68097
Epoch 4/30
58/58 [==============================] - 32s 559ms/step - loss: 0.6906 - binary_accuracy: 0.5349 - val_loss: 0.7905 - val_binary_accuracy: 0.3636

Epoch 00004: val_loss did not improve from 0.68097
Epoch 5/30
58/58 [==============================] - 33s 573ms/step - loss: 0.6904 - binary_accuracy: 0.5230 - val_loss: 0.6647 - val_binary_accuracy: 0.6364

Epoch 00005: val_loss improved from 0.68097 to 0.66472, saving model to xray_class_my_model.best.hdf5
Epoch 6/30
58/58 [==============================] - 33s 572ms/step - loss: 0.6939 - binary_accuracy: 0.5190 - val_loss: 0.7036 - val_binary_accuracy: 0.3182

Epoch 00006: val_loss did not improve from 0.66472
Epoch 7/30
58/58 [==============================] - 33s 563ms/step - loss: 0.6859 - binary_accuracy: 0.5381 - val_loss: 0.8723 - val_binary_accuracy: 0.3182

Epoch 00007: val_loss did not improve from 0.66472
Epoch 8/30
58/58 [==============================] - 34s 578ms/step - loss: 0.6943 - binary_accuracy: 0.5317 - val_loss: 0.7141 - val_binary_accuracy: 0.4091

Epoch 00008: val_loss did not improve from 0.66472
Epoch 9/30
58/58 [==============================] - 33s 571ms/step - loss: 0.6877 - binary_accuracy: 0.5214 - val_loss: 0.6806 - val_binary_accuracy: 0.5455

Epoch 00009: val_loss did not improve from 0.66472
Epoch 10/30
58/58 [==============================] - 33s 572ms/step - loss: 0.6825 - binary_accuracy: 0.5484 - val_loss: 0.6835 - val_binary_accuracy: 0.4091

Epoch 00010: val_loss did not improve from 0.66472
Epoch 11/30
58/58 [==============================] - 33s 571ms/step - loss: 0.6948 - binary_accuracy: 0.5349 - val_loss: 0.7128 - val_binary_accuracy: 0.3636

Epoch 00011: val_loss did not improve from 0.66472
Epoch 12/30
58/58 [==============================] - 33s 572ms/step - loss: 0.6943 - binary_accuracy: 0.4952 - val_loss: 0.6914 - val_binary_accuracy: 0.3182

Epoch 00012: val_loss did not improve from 0.66472
Epoch 13/30
58/58 [==============================] - 33s 566ms/step - loss: 0.6928 - binary_accuracy: 0.5190 - val_loss: 0.6955 - val_binary_accuracy: 0.5455

Epoch 00013: val_loss did not improve from 0.66472
Epoch 14/30
58/58 [==============================] - 33s 566ms/step - loss: 0.6896 - binary_accuracy: 0.5389 - val_loss: 0.6792 - val_binary_accuracy: 0.5909

Epoch 00014: val_loss did not improve from 0.66472
Epoch 15/30
58/58 [==============================] - 33s 569ms/step - loss: 0.6909 - binary_accuracy: 0.5619 - val_loss: 0.6724 - val_binary_accuracy: 0.6364

Epoch 00015: val_loss did not improve from 0.66472

history.history.keys()

dict_keys(['val_loss', 'val_binary_accuracy', 'loss', 'binary_accuracy'])

After training for some time, look at the performance of your model by plotting some performance statistics:

Note, these figures will come in handy for your FDA documentation later in the project

!ls

'Build and train model.ipynb'   test2.dcm
 EDA.ipynb		        test3.dcm
 FDA_Submission_Template.md     test4.dcm
 grayrange.gif		        test5.dcm
 Inference.ipynb	        test6.dcm
 my_model.json		        Trial.ipynb
 sample_labels.csv	        xray_class_my_model.best.hdf5
 test1.dcm

## After training, make some predictions to assess your model's overall performance
## Note that detecting pneumonia is hard even for trained expert radiologists, 
## so there is no need to make the model perfect.
model.load_weights(weight_path)
pred_Y = model.predict(valX, batch_size = 32, verbose = True)

22/22 [==============================] - 0s 18ms/step

Create a binary output instead of just probability using a standard 0.5 threshold for now

pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]

def plot_auc(t_y, p_y):
    
    ## Hint: can use scikit-learn's built in functions here like roc_curve
    
    # Todo
    fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
    fpr, tpr, thresholds = roc_curve(t_y, p_y)
    c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % ('Pneumonia', auc(fpr, tpr)))
    c_ax.legend()
    c_ax.set_xlabel('False Positive Rate')
    c_ax.set_ylabel('True Positive Rate')
    
    return

## what other performance statistics do you want to include here besides AUC? 


# def ... 
# Todo

def plot_pr(t_y, p_y):
    fig, c_ax = plt.subplots(1,1, figsize = (9, 9))
    precision, recall, thresholds = precision_recall_curve(t_y, p_y)
    c_ax.plot(precision, recall, label = '%s (AP Score:%0.2f)'  % ('Pneumonia', average_precision_score(t_y,p_y)))
    c_ax.legend()
    c_ax.set_xlabel('Recall')
    c_ax.set_ylabel('Precision')
    
#Also consider plotting the history of your model training:

def plot_history(history, kind):
    '''
    kind: either "accuracy" or "loss"
    '''
    plt.figure(figsize = (9, 9))
    kind = 'binary_accuracy' if kind == 'accuracy' else kind
    plt.plot(history.history[f'val_{kind}'], label=f'validation {kind}')
    plt.plot(history.history[f'{kind}'], label=f'training {kind}')
    plt.title(f'Validation/Training {kind}')
    plt.ylabel('EPOCHS')
    plt.legend()
    plt.show()
    
    return

plot_auc(valY, pred_Y)

## plot figures
plot_history(history, 'accuracy')
# Todo

plot_history(history, 'loss')

Once you feel you are done training, you'll need to decide the proper classification threshold that optimizes your model's performance for a given metric (e.g. accuracy, F1, precision, etc. You decide)

confusion_matrix(pred_Y_binary, valY)

array([[13,  6],
       [ 2,  1]])

tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()

accuracy_score(pred_Y_binary, valY)
f1_score(pred_Y_binary, valY)
sens = tp/(tp+fn)
spec = tn/(tn+fp)
spec

0.6842105263157895

precision, recall, thresholds = precision_recall_curve(valY, pred_Y)

## Find the threshold that optimize your model's performance,
## and use that threshold to make binary classification. Make sure you take all your metrics into consideration.

# Todo

val_gen = make_val_gen(val_idg, val_df, 'path', 'Pneumonia', IMG_SIZE, 100)
valX, valY = val_gen.next()

Found 632 validated image filenames belonging to 2 classes.

pred_Y = model.predict(valX, batch_size = 32, verbose = True)

100/100 [==============================] - 5s 48ms/step

## Let's look at some examples of true vs. predicted with our best model: 

# Todo

fig, m_axs = plt.subplots(10, 10, figsize = (16, 16))
i = 0
for (c_x, c_y, c_ax) in zip(valX[0:100], valY[0:100], m_axs.flatten()):
    c_ax.imshow(c_x[:,:,0], cmap = 'bone')
    if c_y == 1: 
        if pred_Y[i] > 0.5:
            c_ax.set_title('1, 1')
        else:
            c_ax.set_title('1, 0', color='red')
    else:
        if pred_Y[i] > 0.5: 
            c_ax.set_title('0, 1', color='red')
        else:
            c_ax.set_title('0, 0')
    c_ax.axis('off')
    i=i+1

pred_Y_binary = pred_Y_binary = [1 if i[0] > 0.5 else 0 for i in pred_Y]
confusion_matrix(pred_Y_binary, valY)

array([[71, 21],
       [ 5,  3]])

acc = accuracy_score(pred_Y_binary, valY)
f1 = f1_score(pred_Y_binary, valY)
sens = tp/(tp+fn)
spec = tn/(tn+fp)
spec

0.6842105263157895

plot_auc(valY, pred_Y)

plot_pr(valY, pred_Y)

## Just save model architecture to a .json:

model_json = model.to_json()
with open("my_model.json", "w") as json_file:
    json_file.write(model_json)

!ls

'Build and train model.ipynb'   test2.dcm
 EDA.ipynb		        test3.dcm
 FDA_Submission_Template.md     test4.dcm
 grayrange.gif		        test5.dcm
 Inference.ipynb	        test6.dcm
 my_model.json		        Trial.ipynb
 sample_labels.csv	        xray_class_my_model.best.hdf5
 test1.dcm

Using saved model for further analysis and testing

json_file = open('my_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()

loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("xray_class_my_model.best.hdf5")
print("Loaded model from disk")

Loaded model from disk

loaded_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
model_1 (Model)              (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              25691136  
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_3 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 257       
=================================================================
Total params: 41,062,209
Trainable params: 28,707,329
Non-trainable params: 12,354,880
_________________________________________________________________

val_gen = make_val_gen(val_idg, val_df, 'path', 'Pneumonia', IMG_SIZE, 100)
valX, valY = val_gen.next()
pred_Y = loaded_model.predict(valX, batch_size = 32, verbose = True)

Found 1144 validated image filenames belonging to 2 classes.
100/100 [==============================] - 58s 582ms/step

fig, m_axs = plt.subplots(10, 10, figsize = (16, 16))
i = 0
for (c_x, c_y, c_ax) in zip(valX[0:100], valY[0:100], m_axs.flatten()):
    c_ax.imshow(c_x[:,:,0], cmap = 'bone')
    if c_y == 1: 
        if pred_Y[i] > 0.5:
            c_ax.set_title('1, 1')
        else:
            c_ax.set_title('1, 0', color='red')
    else:
        if pred_Y[i] > 0.5: 
            c_ax.set_title('0, 1', color='red')
        else:
            c_ax.set_title('0, 0')
    c_ax.axis('off')
    i=i+1

# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
for t in [0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
    print(f'when threshold at {t} CF:')
    print(confusion_matrix(pred_Y_binary, valY))
    tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
    print(f'\x1b[34maccuracy \x1b[0m= \x1b[31m{accuracy_score(pred_Y_binary, valY)}\x1b[0m')
    print(f'F1 Score = {f1_score(pred_Y_binary, valY)}')
    print(f'\x1b[34mSensitivity Score \x1b[0m= \x1b[31m{tp/(tp+fn)}\x1b[0m')
    print(f'\x1b[34mSpecificity Score \x1b[0m= \x1b[31m{tn/(tn+fp)}\x1b[0m')
    print('-------------------------------')

when threshold at 0.4 CF:
[[ 1  1]
 [72 26]]
accuracy = 0.27
F1 Score = 0.41600000000000004
Sensitivity Score = 0.2653061224489796
Specificity Score = 0.5
-------------------------------
when threshold at 0.5 CF:
[[59 25]
 [14  2]]
accuracy = 0.61
F1 Score = 0.09302325581395349
Sensitivity Score = 0.125
Specificity Score = 0.7023809523809523
-------------------------------
when threshold at 0.6 CF:
[[73 27]
 [ 0  0]]
accuracy = 0.73
F1 Score = 0.0
Sensitivity Score = nan
Specificity Score = 0.73
-------------------------------
when threshold at 0.7 CF:
[[73 27]
 [ 0  0]]
accuracy = 0.73
F1 Score = 0.0
Sensitivity Score = nan
Specificity Score = 0.73
-------------------------------
when threshold at 0.8 CF:
[[73 27]
 [ 0  0]]
accuracy = 0.73
F1 Score = 0.0
Sensitivity Score = nan
Specificity Score = 0.73
-------------------------------
when threshold at 0.9 CF:
[[73 27]
 [ 0  0]]
accuracy = 0.73
F1 Score = 0.0
Sensitivity Score = nan
Specificity Score = 0.73
-------------------------------

/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in long_scalars
  # Remove the CWD from sys.path while we load stuff.

# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
sens = []
spec = []
for t in thresholds:
    pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
    tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
    sens.append(tp/(tp+fn))
    spec.append(tn/(tn+fp))

plt.figure(figsize=(10,6))
plt.plot(thresholds, sens, label='Sensitivity')
plt.plot(thresholds, spec, label='Specificity')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.legend()
plt.show()

/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in long_scalars
  # Remove the CWD from sys.path while we load stuff.
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:9: RuntimeWarning: invalid value encountered in long_scalars
  if __name__ == '__main__':

thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
fscore = []
for t in thresholds:
    pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
    fscore.append(f1_score(valY, pred_Y_binary))

plt.figure(figsize=(10,6))
plt.plot(thresholds, fscore, label='f1 score')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.title('F1 Score vs Threshold')
plt.legend()
plt.show()

# Test for different threshold levels
# using colors just to make it easir to read and spot numbers
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
prec = []
recall = []
for t in thresholds:
    pred_Y_binary = [1 if i[0] > t else 0 for i in pred_Y]
    tn, fp, fn, tp = confusion_matrix(pred_Y_binary, valY).ravel()
    prec.append(tp/(tp+fp))
    recall.append(tp/(tp+fn))

plt.figure(figsize=(10,6))
plt.plot(thresholds, prec, label='Precision')
plt.plot(thresholds, recall, label='Recall')
plt.xlabel('Threshold Value')
plt.ylabel('Score')
plt.title('Precision and Recall vs Threshold ')
plt.legend()
plt.show()

/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in long_scalars
  # Remove the CWD from sys.path while we load stuff.

2. Redo Training

This time without removing diseases

history = model.fit_generator(train_gen, validation_data=[valX, valY], epochs=30, callbacks=callbacks_list )

Epoch 1/30
104/104 [==============================] - 65s 625ms/step - loss: 0.6983 - binary_accuracy: 0.5184 - val_loss: 0.6378 - val_binary_accuracy: 0.6818

Epoch 00001: val_loss improved from inf to 0.63784, saving model to xray_class_my_model2.best.hdf5
Epoch 2/30
104/104 [==============================] - 60s 578ms/step - loss: 0.6882 - binary_accuracy: 0.5472 - val_loss: 0.6362 - val_binary_accuracy: 0.5455

Epoch 00002: val_loss improved from 0.63784 to 0.63619, saving model to xray_class_my_model2.best.hdf5
Epoch 3/30
104/104 [==============================] - 60s 577ms/step - loss: 0.6862 - binary_accuracy: 0.5590 - val_loss: 0.6303 - val_binary_accuracy: 0.5000

Epoch 00003: val_loss improved from 0.63619 to 0.63032, saving model to xray_class_my_model2.best.hdf5
Epoch 4/30
104/104 [==============================] - 60s 574ms/step - loss: 0.6821 - binary_accuracy: 0.5629 - val_loss: 0.7177 - val_binary_accuracy: 0.2727

Epoch 00004: val_loss did not improve from 0.63032
Epoch 5/30
104/104 [==============================] - 61s 587ms/step - loss: 0.6770 - binary_accuracy: 0.5909 - val_loss: 0.6853 - val_binary_accuracy: 0.4545

Epoch 00005: val_loss did not improve from 0.63032
Epoch 6/30
104/104 [==============================] - 60s 572ms/step - loss: 0.6769 - binary_accuracy: 0.5795 - val_loss: 0.7305 - val_binary_accuracy: 0.2273

Epoch 00006: val_loss did not improve from 0.63032
Epoch 7/30
104/104 [==============================] - 59s 570ms/step - loss: 0.6777 - binary_accuracy: 0.5826 - val_loss: 0.6393 - val_binary_accuracy: 0.5000

Epoch 00007: val_loss did not improve from 0.63032
Epoch 8/30
104/104 [==============================] - 60s 574ms/step - loss: 0.6753 - binary_accuracy: 0.5743 - val_loss: 0.6945 - val_binary_accuracy: 0.4091

Epoch 00008: val_loss did not improve from 0.63032
Epoch 9/30
104/104 [==============================] - 59s 571ms/step - loss: 0.6718 - binary_accuracy: 0.6023 - val_loss: 0.6987 - val_binary_accuracy: 0.4091

Epoch 00009: val_loss did not improve from 0.63032
Epoch 10/30
104/104 [==============================] - 60s 575ms/step - loss: 0.6603 - binary_accuracy: 0.6058 - val_loss: 0.7548 - val_binary_accuracy: 0.3636

Epoch 00010: val_loss did not improve from 0.63032
Epoch 11/30
104/104 [==============================] - 60s 580ms/step - loss: 0.6691 - binary_accuracy: 0.5957 - val_loss: 0.7652 - val_binary_accuracy: 0.3182

Epoch 00011: val_loss did not improve from 0.63032
Epoch 12/30
104/104 [==============================] - 60s 573ms/step - loss: 0.6628 - binary_accuracy: 0.6031 - val_loss: 0.7235 - val_binary_accuracy: 0.3636

Epoch 00012: val_loss did not improve from 0.63032
Epoch 13/30
104/104 [==============================] - 60s 577ms/step - loss: 0.6698 - binary_accuracy: 0.5988 - val_loss: 0.7546 - val_binary_accuracy: 0.2273

Epoch 00013: val_loss did not improve from 0.63032

## After training, make some predictions to assess your model's overall performance
## Note that detecting pneumonia is hard even for trained expert radiologists, 
## so there is no need to make the model perfect.
model.load_weights(weight_path)
pred_Y = model.predict(valX, batch_size = 32, verbose = True)

22/22 [==============================] - 0s 15ms/step