在普通的CNN中随着模型的加深,train的效果不会保持不变或者更好,反而是会下降。应用resnet可以使模型在加深的同时还能保持好的表现。其原理就是跳过一层或多层单元,走捷径直接到下面的单元。
对于本次比赛,我应用cifar_10的resnet模型进行调整得到的预测结果还可以,top3可以达到90以上。下面是我对代码的理解。比赛中用的是第一类resnet,以后有机会用一下第二类。
1.头文件
from __future__ import print_function import keras from keras.layers import Dense, Conv2D, BatchNormalization, Activation from keras.layers import AveragePooling2D, Input, Flatten from keras.optimizers import Adam from keras.callbacks import ModelCheckpoint, LearningRateScheduler from keras.callbacks import ReduceLROnPlateau from keras.preprocessing.image import ImageDataGenerator from keras.regularizers import l2 from keras import backend as K from keras.models import Model from keras.datasets import cifar10 import numpy as np import os import functools2.参数定义,num_classes改为20分类,subtract_pixel_mean还不太理解原因。n是resnet核的数目,一个resnet核中有两个conv卷积层,depth的计算是核数*conv层数*块数,块数为的分割线为图像缩小而通道数增加的那一层,就相当于普通模型里的maxpooling层,所以depth这么计算(个人理解)。
# Training parameters batch_size = 32 # orig paper trained all networks with batch_size=128 epochs = 100 data_augmentation = True num_classes = 20 img_row = 224 img_col = 224 # Subtracting pixel mean improves accuracy subtract_pixel_mean = True n = 3 # Model version # Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2) version = 1 # Computed depth from supplied model parameter n if version == 1: depth = n * 6 + 2 elif version == 2: depth = n * 9 + 2 # Model name, depth and version model_type = 'ResNet%dv%d' % (depth, version)3.数据初始化。
# Load the CIFAR10 data. x_train = np.load("image1.npy") y_train = np.load("classes1.npy") x_train = x_train.reshape(x_train.shape[0], img_row, img_col, 1) x_test = x_train[12000:] y_test = y_train[12000:] x_train = x_train[0:12000] y_train = y_train[0:12000] # Input image dimensions. input_shape = x_train.shape[1:] # Normalize data. x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 # If subtract pixel mean is enabled if subtract_pixel_mean: x_train_mean = np.mean(x_train, axis=0) x_train -= x_train_mean x_test -= x_train_mean print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') print('y_train shape:', y_train.shape) # Convert class vectors to binary class matrices. y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes)4.learning rate 的调整随着epoch的增加减小learning rate
def lr_schedule(epoch): lr = 1e-3 if epoch > 180: lr *= 0.5e-3 elif epoch > 160: lr *= 1e-3 elif epoch > 120: lr *= 1e-2 elif epoch > 80: lr *= 1e-1 print('Learning rate: ', lr) return lr5.一层resnet层,整个框架中kernel_size都为3,padding=same为的是让每次卷积后的图像大小一致,这样走捷径的时候两个矩阵相加保持两个矩阵一样大。整个模型没有用dropout但是加入了正则化,
def resnet_layer(inputs, num_filters=16, kernel_size=3, strides=1, activation='relu', batch_normalization=True, conv_first=True): """2D Convolution-Batch Normalization-Activation stack builder # Arguments inputs (tensor): input tensor from input image or previous layer num_filters (int): Conv2D number of filters kernel_size (int): Conv2D square kernel dimensions strides (int): Conv2D square stride dimensions activation (string): activation name batch_normalization (bool): whether to include batch normalization conv_first (bool): conv-bn-activation (True) or activation-bn-conv (False) # Returns x (tensor): tensor as input to the next layer """ conv = Conv2D(num_filters, kernel_size=kernel_size, strides=strides, padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4)) x = inputs if conv_first: x = conv(x) if batch_normalization: x = BatchNormalization()(x) if activation is not None: x = Activation(activation)(x) else: if batch_normalization: x = BatchNormalization()(x) if activation is not None: x = Activation(activation)(x) x = conv(x) return x6.定义resnet核函数,一个核有两个resnet层,也可以说有两个conv层。第一层for为块循环,每一次都会缩小图片增加通道数,第二层for根据输入的n添加resnet核数,最开始先得到最初的x,之后得到下一层y,根据y再做一次卷积但是不经过激励函数,等到x与第二次卷积得到的y相加之后再经过relu激励函数,如此往复n次,进入stack=1;(比如输入图片为224*224*1,经过stack=0,output为224*224*16,这时候需要进行maxpooling,这里把步长strides设为2相当于进行maxpooling)进行第一次步长为2的卷积得到的y为112*112*32,在此之后就不需要步长为2的卷积了,所以下一次步长还是1,但是这时候的x还是224*224*16,所以进行一次变换,通过resnet_layer步长为2,通道为32进行变换,这时候就不需要激励函数和batch_normalization,这之后得到的x就和第二次没通过激励函数的y一样大小为112*112*32,之后再通过relu激励函数,再往后就和以前一样了一直112*112*32直到stack=2,再缩小变为56*56*128直到循环结束。最后做一次AveragePooling2D,再展开,通过softmax分类,然后结束。 def resnet_v1(input_shape, depth, num_classes=20): if (depth - 2) % 6 != 0: raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') # Start model definition. num_filters = 16 num_res_blocks = int((depth - 2) / 6) inputs = Input(shape=input_shape) x = resnet_layer(inputs=inputs) # Instantiate the stack of residual units for stack in range(3): for res_block in range(num_res_blocks): strides = 1 if stack > 0 and res_block == 0: strides = 2 y = resnet_layer(inputs=x, num_filters=num_filters, strides=strides) y = resnet_layer(inputs=y, num_filters=num_filters, activation=None) if stack > 0 and res_bl,ock == 0: # first layer but not first stack # linear projection residual shortcut connection to match # changed dims x = resnet_layer(inputs=x, num_filters=num_filters, kernel_size=1, strides=strides, activation=None, batch_normalization=False) x = keras.layers.add([x, y]) x = Activation('relu')(x) num_filters *= 2 # Add classifier on top. # v1 does not use BN after last shortcut connection-ReLU x = AveragePooling2D(pool_size=8)(x) y = Flatten()(x) outputs = Dense(num_classes, activation='softmax', kernel_initializer='he_normal')(y) # Instantiate model. model = Model(inputs=inputs, outputs=outputs) return model7.上面是最主要的部分,后面的之前基本谈过了。callback在这里的几个函数官网可以查到,为的是不断更新,只有更好的epoch才更新模型权重,我没有用到这部分,我是直接到保存了最后的模型,现在想起来好像有点欠妥。等下次用到时再详细考虑一下吧。
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=lr_schedule(0)), metrics=['accuracy']) model.summary() print(model_type) # Prepare model model saving directory. save_dir = os.path.join(os.getcwd(), 'saved_models') model_name = 'hw_%s_model.{epoch:03d}.h5' % model_type if not os.path.isdir(save_dir): os.makedirs(save_dir) filepath = os.path.join(save_dir, model_name) ''' # Prepare callbacks for model saving and for learning rate adjustment. checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=1, save_best_only=True) lr_scheduler = LearningRateScheduler(lr_schedule) lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6) callbacks = [checkpoint, lr_reducer, lr_scheduler] '''8.data扩充以及模型保存,最后是test集上的效果。
# Run training, with or without data augmentation. if not data_augmentation: print('Not using data augmentation.') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True, callbacks=callbacks) else: print('Using real-time data augmentation.') # This will do preprocessing and realtime data augmentation: datagen = ImageDataGenerator( # set input mean to 0 over the dataset featurewise_center=False, # set each sample mean to 0 samplewise_center=False, # divide inputs by std of dataset featurewise_std_normalization=False, # divide each input by its std samplewise_std_normalization=False, # apply ZCA whitening zca_whitening=False, # randomly rotate images in the range (deg 0 to 180) rotation_range=0, # randomly shift images horizontally width_shift_range=0.1, # randomly shift images vertically height_shift_range=0.1, # randomly flip images horizontal_flip=True, # randomly flip images vertical_flip=False) # Compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(x_train) # Fit the model on the batches generated by datagen.flow(). model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), validation_data=(x_test, y_test), epochs=epochs, verbose=1, workers=4,callbacks=callbacks) model.save('model.h5') # Score trained model. #scores = model.evaluate(x_test, y_test, verbose=1) #print('Test loss:', scores[0]) #print('Test accuracy:', scores[1])