閱讀文章 - 精華區 NTUCH-HW

import pandas as pd from glob import glob import os import tensorflow as tf import numpy as np import matplotlib.pyplot as plt from keras.utils import np_utils from keras.preprocessing import image from __future__ import print_function import keras from keras.preprocessing.image import ImageDataGenerator from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten from keras.layers import Conv2D, MaxPooling2D from itertools import chain import numpy as np # linear algebra from skimage.io import imread base_bone_dir = ('E://temp') age_df = pd.read_csv(os.path.join(base_bone_dir, 'boneage-training-dataset.csv')) age_df['path'] = age_df['id'].map(lambda x: os.path.join(base_bone_dir, 'boneage-training-dataset', '{}.png'.format(x))) fig, m_axs = plt.subplots(4, 4, figsize = (16, 16)) for (c_x, c_y, c_ax) in zip(t_x, t_y, m_axs.flatten()): c_ax.imshow(c_x[:,:,0], cmap = 'bone', vmin = -3, vmax = 3) c_ax.set_title('%2.0f months' % (c_y*boneage_div+boneage_mean)) c_ax.axis('off') data=age_df print('Scans found:', len(data), ', Total Headers', data.shape[0]) data=age_df.drop(age_df.index[11688:12611]) data['exists'] = data['path'].map(os.path.exists) print(data['exists'].sum(), 'images found of', data.shape[0], 'total') pull1=np.where(data["exists"]==False)[0] data=data.drop(data.index[pull1]) data['gender'] = data['male'].map(lambda x: 'male' if x else 'female') boneage_mean = data['boneage'].mean() boneage_div = 2*data['boneage'].std() data['boneage_zscore'] = data['boneage'].map(lambda x: (x-boneage_mean)/boneage_div) data.dropna(inplace = True) data[['boneage', 'male', 'boneage_zscore']].hist(figsize = (10, 5)) data['boneage_category'] = pd.cut(data['boneage'], 10) new_age_df = data.groupby(['boneage_category', 'male']).apply(lambda x: x.sample(500, replace = True) ).reset_index(drop = True) print('New Data Size:', new_age_df.shape[0], 'Old Size:', age_df.shape[0]) new_age_df[['boneage', 'male']].hist(figsize = (10, 5)) from sklearn.model_selection import train_test_split train_df, valid_df = train_test_split(new_age_df, test_size = 0.25, random_state = 2018, stratify = new_age_df['boneage_category']) print('train', train_df.shape[0], 'validation', valid_df.shape[0]) def flow_from_dataframe(img_data_gen, in_df, path_col, y_col, **dflow_args): base_dir = os.path.dirname(in_df[path_col].values[0]) print('## Ignore next message from keras, values are replaced anyways') df_gen = img_data_gen.flow_from_directory(base_dir, class_mode = 'sparse', **dflow_args) df_gen.filenames = in_df[path_col].values df_gen.classes = np.stack(in_df[y_col].values) df_gen.samples = in_df.shape[0] df_gen.n = in_df.shape[0] df_gen._set_index_array() df_gen.directory = '' # since we have the full path print('Reinserting dataframe: {} images'.format(in_df.shape[0])) return df_gen from keras.preprocessing.image import ImageDataGenerator from keras.applications.vgg16 import preprocess_input IMG_SIZE = (384, 384) core_idg = ImageDataGenerator(samplewise_center=False, samplewise_std_normalization=False, horizontal_flip = True, vertical_flip = False, height_shift_range = 0.15, width_shift_range = 0.15, rotation_range = 5, shear_range = 0.01, fill_mode = 'nearest', zoom_range=0.25, preprocessing_function = preprocess_input) train_gen = flow_from_dataframe(core_idg, train_df, path_col = 'path', y_col = 'boneage_zscore', target_size = IMG_SIZE, color_mode = 'rgb', batch_size = 32) valid_gen = flow_from_dataframe(core_idg, valid_df, path_col = 'path', y_col = 'boneage_zscore', target_size = IMG_SIZE, color_mode = 'rgb', batch_size = 256) # we can use much larger batches for evaluation # used a fixed dataset for evaluating the algorithm test_X, test_Y = next(flow_from_dataframe(core_idg, valid_df, path_col = 'path', y_col = 'boneage_zscore', target_size = IMG_SIZE, color_mode = 'rgb', batch_size = 1024)) test_X1, test_Y1=test_X/255, test_Y/255 t_x, t_y = next(train_gen) fig, m_axs = plt.subplots(4, 4, figsize = (16, 16)) for (c_x, c_y, c_ax) in zip(t_x, t_y, m_axs.flatten()): c_ax.imshow(c_x[:,:,0], cmap = 'bone', vmin = -127, vmax = 127) c_ax.set_title('%2.0f months' % (c_y*boneage_div+boneage_mean)) c_ax.axis('off') from keras.applications.vgg16 import VGG16 from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Input, Conv2D, multiply, LocallyConnected2D, Lambda from keras.models import Model in_lay = Input(t_x.shape[1:]) base_pretrained_model = VGG16(input_shape = t_x.shape[1:], include_top = False, weights = 'imagenet') base_pretrained_model.trainable = False pt_depth = base_pretrained_model.get_output_shape_at(0)[-1] pt_features = base_pretrained_model(in_lay) from keras.layers import BatchNormalization bn_features = BatchNormalization()(pt_features) # here we do an attention mechanism to turn pixels in the GAP on an off attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(bn_features) attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer) attn_layer = LocallyConnected2D(1, kernel_size = (1,1), padding = 'valid', activation = 'sigmoid')(attn_layer) # fan it out to all of the channels up_c2_w = np.ones((1, 1, 1, pt_depth)) up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same', activation = 'linear', use_bias = False, weights = [up_c2_w]) up_c2.trainable = False attn_layer = up_c2(attn_layer) mask_features = multiply([attn_layer, bn_features]) gap_features = GlobalAveragePooling2D()(mask_features) gap_mask = GlobalAveragePooling2D()(attn_layer) # to account for missing values from the attention model gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask]) gap_dr = Dropout(0.5)(gap) dr_steps = Dropout(0.25)(Dense(1024, activation = 'elu')(gap_dr)) out_layer = Dense(1, activation = 'linear')(dr_steps) # linear is what 16bit did bone_age_model = Model(inputs = [in_lay], outputs = [out_layer]) from keras.metrics import mean_absolute_error def mae_months(in_gt, in_pred): return mean_absolute_error(boneage_div*in_gt, boneage_div*in_pred) bone_age_model.compile(optimizer = 'adam', loss = 'mse', metrics = [mae_months]) bone_age_model.summary() from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau weight_path="{}_weights.best.hdf5".format('bone_age') checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only = True) reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=10, verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001) early = EarlyStopping(monitor="val_loss", mode="min", patience=5) # probably needs to be more patient, but kaggle time is limited callbacks_list = [checkpoint, early, reduceLROnPlat] bone_age_model.fit_generator(train_gen, validation_data = (test_X, test_Y), epochs = 6, callbacks = callbacks_list) for attn_layer in bone_age_model.layers: c_shape = attn_layer.get_output_shape_at(0) if len(c_shape)==4: if c_shape[-1]==1: print(attn_layer) break import keras.backend as K rand_idx = np.random.choice(range(len(test_X)), size = 8) attn_func = K.function(inputs = [bone_age_model.get_input_at(0), K.learning_phase()], outputs = [attn_layer.get_output_at(0)] ) fig, m_axs = plt.subplots(len(rand_idx), 2, figsize = (8, 4*len(rand_idx))) [c_ax.axis('off') for c_ax in m_axs.flatten()] for c_idx, (img_ax, attn_ax) in zip(rand_idx, m_axs): cur_img = test_X[c_idx:(c_idx+1)] attn_img = attn_func([cur_img, 0])[0] img_ax.imshow(cur_img[0,:,:,0], cmap = 'bone') attn_ax.imshow(attn_img[0, :, :, 0], cmap = 'viridis', vmin = -0.1, vmax = 0.5, interpolation = 'lanczos') real_age = boneage_div*test_Y[c_idx]+boneage_mean img_ax.set_title('Hand Image\nAge:%2.2fY' % (real_age/12)) pred_age = boneage_div*bone_age_model.predict(cur_img)+boneage_mean attn_ax.set_title('Attention Map\nPred:%2.2fY' % (pred_age/12)) fig fig.savefig('attention_map.png', dpi = 300) pred_Y = boneage_div*bone_age_model.predict(test_X, batch_size = 32, verbose = True)+boneage_mean test_Y_months = boneage_div*test_Y+boneage_mean fig, ax1 = plt.subplots(1,1, figsize = (6,6)) ax1.plot(test_Y_months, pred_Y, 'r.', label = 'predictions') ax1.plot(test_Y_months, test_Y_months, 'b-', label = 'actual') ax1.legend() ax1.set_xlabel('Actual Age (Months)') ax1.set_ylabel('Predicted Age (Months)') ord_idx = np.argsort(test_Y) ord_idx = ord_idx[np.linspace(0, len(ord_idx)-1, 8).astype(int)] # take 8 evenly spaced ones fig, m_axs = plt.subplots(4, 2, figsize = (16, 32)) for (idx, c_ax) in zip(ord_idx, m_axs.flatten()): c_ax.imshow(test_X[idx, :,:,0], cmap = 'bone') c_ax.set_title('Age: %2.1f\nPredicted Age: %2.1f' % (test_Y_months[idx], pred_Y[idx])) c_ax.axis('off') rand_idx = np.random.choice(range(test_X.shape[0]), 8) fig, m_axs = plt.subplots(4, 2, figsize = (16, 32)) for (idx, c_ax) in zip(rand_idx, m_axs.flatten()): c_ax.imshow(test_X[idx, :,:,0], cmap = 'bone') c_ax.set_title('Age: %2.1f\nPredicted Age: %2.1f' % (test_Y_months[idx], pred_Y[idx])) c_ax.axis('off')