Deep Residual Learning(ResNet)の実装から比較するディープラーニングフレームワーク
Deep Residual Learning(ResNet)とは、2015年にMicrosoft Researchが発表した、非常に深いネットワークでの高精度な学習を可能にする、ディープラーニング、特に畳み込みニューラルネットワークの構造です。154層で画像を学習することにより、人間を超える精度が得られています。今回は、Chainer, Keras, Tensorflow, Caffe,TorchでのResNetの実装を紹介します。各フレームワークにおいて、いかにしてResNetなどの大きいネットワークを記述するかを見ることで、フレームワークの柔軟性などがおおよそ掴めるかと思います。
ResNetとは
Chainer
- 記述量:67行
- ソース:https://github.com/mitmul/chainer-cifar10/blob/master/models/ResNet.py
- 入門:Chainerチュートリアル の和訳【Chainerの紹介と多層パーセプトロン】 - 俺とプログラミング
import chainer import chainer.functions as F import chainer.links as L import math class ResBlock(chainer.Chain): def __init__(self, n_in, n_out, stride=1, ksize=1): w = math.sqrt(2) super(ResBlock, self).__init__( conv1=L.Convolution2D(n_in, n_out, 3, stride, 1, w), bn1=L.BatchNormalization(n_out), conv2=L.Convolution2D(n_out, n_out, 3, 1, 1, w), bn2=L.BatchNormalization(n_out), ) def __call__(self, x, train): h = F.relu(self.bn1(self.conv1(x), test=not train)) h = self.bn2(self.conv2(h), test=not train) if x.data.shape != h.data.shape: xp = chainer.cuda.get_array_module(x.data) n, c, hh, ww = x.data.shape pad_c = h.data.shape[1] - c p = xp.zeros((n, pad_c, hh, ww), dtype=xp.float32) p = chainer.Variable(p, volatile=not train) x = F.concat((p, x)) if x.data.shape[2:] != h.data.shape[2:]: x = F.average_pooling_2d(x, 1, 2) return F.relu(h + x) class ResNet(chainer.Chain): def __init__(self, block_class, n=18): super(ResNet, self).__init__() w = math.sqrt(2) links = [('conv1', L.Convolution2D(3, 16, 3, 1, 0, w))] links += [('bn1', L.BatchNormalization(16))] for i in range(n): links += [('res{}'.format(len(links)), block_class(16, 16))] for i in range(n): links += [('res{}'.format(len(links)), block_class(32 if i > 0 else 16, 32, 1 if i > 0 else 2))] for i in range(n): links += [('res{}'.format(len(links)), block_class(64 if i > 0 else 32, 64, 1 if i > 0 else 2))] links += [('_apool{}'.format(len(links)), F.AveragePooling2D(6, 1, 0, False, True))] links += [('fc{}'.format(len(links)), L.Linear(64, 10))] for link in links: if not link[0].startswith('_'): self.add_link(*link) self.forward = links self.train = True def __call__(self, x, t): for name, f in self.forward: if 'res' in name: x = f(x, self.train) else: x = f(x) if self.train: self.loss = F.softmax_cross_entropy(x, t) self.accuracy = F.accuracy(x, t) return self.loss else: return x model = ResNet(ResBlock)
Keras
- 記述量:72行
- ソース:https://github.com/raghakot/keras-resnet
- 入門:Keras Documentation
from keras.models import Model from keras.layers import Input, Activation, merge, Dense, Flatten from keras.layers.convolutional import Convolution2D, MaxPooling2D, AveragePooling2D from keras.layers.normalization import BatchNormalization from keras.utils.visualize_util import plot def _conv_bn_relu(nb_filter, nb_row, nb_col, subsample=(1, 1)): def f(input): conv = Convolution2D(nb_filter=nb_filter, nb_row=nb_row, nb_col=nb_col, subsample=subsample, init="he_normal", border_mode="same")(input) norm = BatchNormalization(mode=0, axis=1)(conv) return Activation("relu")(norm) return f def _bn_relu_conv(nb_filter, nb_row, nb_col, subsample=(1, 1)): def f(input): norm = BatchNormalization(mode=0, axis=1)(input) activation = Activation("relu")(norm) return Convolution2D(nb_filter=nb_filter, nb_row=nb_row, nb_col=nb_col, subsample=subsample, init="he_normal", border_mode="same")(activation) return f def _basic_block(nb_filters, init_subsample=(1, 1)): def f(input): conv1 = _bn_relu_conv(nb_filters, 3, 3, subsample=init_subsample)(input) residual = _bn_relu_conv(nb_filters, 3, 3)(conv1) return _shortcut(input, residual) return f def _shortcut(input, residual): stride_width = input._keras_shape[2] / residual._keras_shape[2] stride_height = input._keras_shape[3] / residual._keras_shape[3] equal_channels = residual._keras_shape[1] == input._keras_shape[1] shortcut = input if stride_width > 1 or stride_height > 1 or not equal_channels: shortcut = Convolution2D(nb_filter=residual._keras_shape[1], nb_row=1, nb_col=1, subsample=(stride_width, stride_height), init="he_normal", border_mode="valid")(input) return merge([shortcut, residual], mode="sum") def _residual_block(block_function, nb_filters, repetations, is_first_layer=False): def f(input): for i in range(repetations): init_subsample = (1, 1) if i == 0 and not is_first_layer: init_subsample = (2, 2) input = block_function(nb_filters=nb_filters, init_subsample=init_subsample)(input) return input return f def resnet(): input = Input(shape=(3, 224, 224)) conv1 = _conv_bn_relu(nb_filter=64, nb_row=7, nb_col=7, subsample=(2, 2))(input) pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), border_mode="same")(conv1) # Build residual blocks.. block_fn = _basic_block block1 = _residual_block(block_fn, nb_filters=64, repetations=3, is_first_layer=True)(pool1) block2 = _residual_block(block_fn, nb_filters=128, repetations=4)(block1) block3 = _residual_block(block_fn, nb_filters=256, repetations=6)(block2) block4 = _residual_block(block_fn, nb_filters=512, repetations=3)(block3) # Classifier block pool2 = AveragePooling2D(pool_size=(7, 7), strides=(1, 1), border_mode="same")(block4) flatten1 = Flatten()(pool2) dense = Dense(output_dim=1000, init="he_normal", activation="softmax")(flatten1) model = Model(input=input, output=dense) return model
Tensorflow
- 記述量:105行
- ソース:https://github.com/xuyuwei/resnet-tf
- 入門:TensorFlowチュートリアル - 熟練者のためのディープMNIST(翻訳) - Qiita
import numpy as np import tensorflow as tf def weight_variable(shape, name=None): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial, name=name) def softmax_layer(inpt, shape): fc_w = weight_variable(shape) fc_b = tf.Variable(tf.zeros([shape[1]])) fc_h = tf.nn.softmax(tf.matmul(inpt, fc_w) + fc_b) return fc_h def conv_layer(inpt, filter_shape, stride): out_channels = filter_shape[3] filter_ = weight_variable(filter_shape) conv = tf.nn.conv2d(inpt, filter=filter_, strides=[1, stride, stride, 1], padding="SAME") mean, var = tf.nn.moments(conv, axes=[0,1,2]) beta = tf.Variable(tf.zeros([out_channels]), name="beta") gamma = weight_variable([out_channels], name="gamma") batch_norm = tf.nn.batch_norm_with_global_normalization( conv, mean, var, beta, gamma, 0.001, scale_after_normalization=True) out = tf.nn.relu(batch_norm) return out def residual_block(inpt, output_depth, down_sample, projection=False): input_depth = inpt.get_shape().as_list()[3] if down_sample: filter_ = [1,2,2,1] inpt = tf.nn.max_pool(inpt, ksize=filter_, strides=filter_, padding='SAME') conv1 = conv_layer(inpt, [3, 3, input_depth, output_depth], 1) conv2 = conv_layer(conv1, [3, 3, output_depth, output_depth], 1) if input_depth != output_depth: if projection: # Option B: Projection shortcut input_layer = conv_layer(inpt, [1, 1, input_depth, output_depth], 2) else: # Option A: Zero-padding input_layer = tf.pad(inpt, [[0,0], [0,0], [0,0], [0, output_depth - input_depth]]) else: input_layer = inpt res = conv2 + input_layer return res n_dict = {20:1, 32:2, 44:3, 56:4} def resnet(inpt, n): if n < 20 or (n - 20) % 12 != 0: print "ResNet depth invalid." return num_conv = (n - 20) / 12 + 1 layers = [] with tf.variable_scope('conv1'): conv1 = conv_layer(inpt, [3, 3, 3, 16], 1) layers.append(conv1) for i in range (num_conv): with tf.variable_scope('conv2_%d' % (i+1)): conv2_x = residual_block(layers[-1], 16, False) conv2 = residual_block(conv2_x, 16, False) layers.append(conv2_x) layers.append(conv2) assert conv2.get_shape().as_list()[1:] == [32, 32, 16] for i in range (num_conv): down_sample = True if i == 0 else False with tf.variable_scope('conv3_%d' % (i+1)): conv3_x = residual_block(layers[-1], 32, down_sample) conv3 = residual_block(conv3_x, 32, False) layers.append(conv3_x) layers.append(conv3) assert conv3.get_shape().as_list()[1:] == [16, 16, 32] for i in range (num_conv): down_sample = True if i == 0 else False with tf.variable_scope('conv4_%d' % (i+1)): conv4_x = residual_block(layers[-1], 64, down_sample) conv4 = residual_block(conv4_x, 64, False) layers.append(conv4_x) layers.append(conv4) assert conv4.get_shape().as_list()[1:] == [8, 8, 64] with tf.variable_scope('fc'): global_pool = tf.reduce_mean(layers[-1], [1, 2]) assert global_pool.get_shape().as_list()[1:] == [64] out = softmax_layer(global_pool, [64, 10]) layers.append(out) return layers[-1]
Torch
require 'nn' require 'cunn' local function convunit(net,fin,fout,fsize,str,pad,nobatch) local nobatch = nobatch or false local pad = pad or 1 local str = str or 1 local fsize = fsize or 3 net:add(nn.SpatialConvolution(fin,fout,fsize,fsize,str,str,pad,pad)) if(nobatch==false) then net:add(nn.SpatialBatchNormalization(fout)) end net:add(nn.ReLU(true)) end local function convunit31(net,fin,half,str,nobatch) local str = str or 3 local half = half or false if(half) then convunit(net,fin,2*fin,str,2,nil,nobatch) else convunit(net,fin,fin,str,1,nil,nobatch) end end local function convunit2(net,fin,half) local half = half or false convunit31(net,fin,half,nil,true) if(half) then convunit31(net,2*fin) else convunit31(net,fin) end end local function resUnit(net, unit, fin, half) local half = half or false local net = net or nn.Sequential() local cat = nn.ConcatTable() cat:add(unit) if(half==false) then cat:add(nn.Identity()) else cat:add(nn.SpatialConvolution(fin,2*fin,1,1,2,2)) end net:add(cat) net:add(nn.CAddTable()) net:add(nn.ReLU(true)) return net end local function rconvunit2(net,fin,half) local unit = nn.Sequential() convunit2(unit,fin,half) resUnit(net,unit,fin,half) end local function rconvunitN(net,fin,N) local N = N or 0 for i=1,N do rconvunit2(net,fin) end end local res = {} res.convunit = convunit res.rconvunit2 = rconvunit2 res.rconvunitN = rconvunitN return res
require 'nn' local model = {} local res = require 'residual' function model.residual(N) local N = N or 15 local half = true net = nn.Sequential() net:add(nn.Reshape(1,28,28)) res.convunit(net,1,64) res.rconvunitN(net,64,N) res.rconvunit2(net,64,half) res.rconvunitN(net,128,N) res.rconvunit2(net,128,half) res.rconvunitN(net,256,N) res.rconvunit2(net,256,half) cls = nn.Sequential() local wid = 4 cls:add(nn.Reshape(512*wid*wid)) cls:add(nn.Linear(512*wid*wid,10)) cls:add(nn.LogSoftMax()) net:add(cls) local ct = nn.ClassNLLCriterion() require 'cunn'; net = net:cuda() ct = ct:cuda() return net,ct end return model
Caffe
- 記述量:99行+4548行(prototxt)
- ソース(コード):https://github.com/happynear/caffe-windows/blob/master/examples/pycaffe/resnet.py
- ソース(prototxt):https://github.com/KaimingHe/deep-residual-networks/blob/master/prototxt/ResNet-101-deploy.prototxt
- 入門:caffe.md · GitHub
from __future__ import print_function from caffe import layers as L, params as P, to_proto from caffe.proto import caffe_pb2 import caffe # helper function for common structures def conv_factory(bottom, ks, nout, stride=1, pad=0): conv = L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type='msra')) batch_norm = L.BatchNorm(conv, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) scale = L.Scale(batch_norm, bias_term=True, in_place=True) return scale def conv_factory_relu(bottom, ks, nout, stride=1, pad=0): conv = L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type='msra')) batch_norm = L.BatchNorm(conv, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) scale = L.Scale(batch_norm, bias_term=True, in_place=True) relu = L.ReLU(scale, in_place=True) return relu def conv_factory_relu_inverse(bottom, ks, nout, stride=1, pad=0): batch_norm = L.BatchNorm(bottom, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) scale = L.Scale(batch_norm, bias_term=True, in_place=True) relu = L.ReLU(scale, in_place=True) conv = L.Convolution(relu, kernel_size=ks, stride=stride, num_output=nout, pad=pad, weight_filler=dict(type='msra')) return conv def conv_factory_relu_inverse_no_inplace(bottom, ks, nout, stride=1, pad=0): batch_norm = L.BatchNorm(bottom, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) scale = L.Scale(batch_norm, bias_term=True, in_place=True) relu = L.ReLU(scale, in_place=True) conv = L.Convolution(relu, kernel_size=ks, stride=stride, num_output=nout, pad=pad, weight_filler=dict(type='msra')) return conv def residual_factory1(bottom, num_filter): conv1 = conv_factory_relu_inverse_no_inplace(bottom, 3, num_filter, 1, 1); conv2 = conv_factory_relu_inverse(conv1, 3, num_filter, 1, 1); addition = L.Eltwise(bottom, conv2, operation=P.Eltwise.SUM) return addition def residual_factory_proj(bottom, num_filter, stride=2): batch_norm = L.BatchNorm(bottom, in_place=True, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) scale = L.Scale(batch_norm, bias_term=True, in_place=True) conv1 = conv_factory_relu(scale, 3, num_filter, stride, 1); conv2 = L.Convolution(conv1, kernel_size=3, stride=1, num_output=num_filter, pad=1, weight_filler=dict(type='msra')); proj = L.Convolution(scale, kernel_size=1, stride=stride, num_output=num_filter, pad=0, weight_filler=dict(type='msra')); addition = L.Eltwise(conv2, proj, operation=P.Eltwise.SUM) return addition def max_pool(bottom, ks, stride=1): return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) def resnet(train_lmdb, test_lmdb, batch_size=256, stages=[2, 2, 2, 2], input_size=128, first_output=32, include_acc=False): # now, this code can't recognize include phase, so there will only be a TEST phase data layer data, label = L.Data(source=train_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=227, mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TRAIN'))) data, label = L.Data(source=test_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(crop_size=227, mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TEST'))) data, label = L.MemoryData(batch_size=batch_size, height=input_size, width=input_size, channels=3, ntop=2, transform_param=dict(mean_value=[104, 117, 123], mirror=True), include=dict(phase=getattr(caffe_pb2, 'TEST'))) # the net itself relu1 = conv_factory_relu(data, 3, first_output, stride=1, pad=1) relu2 = conv_factory_relu(relu1, 3, first_output, stride=1, pad=1) residual = max_pool(relu2, 3, stride=2) for i in stages[1:]: first_output *= 2 for j in range(i): if j==0: if i==0: residual = residual_factory_proj(residual, first_output, 1) else: residual = residual_factory_proj(residual, first_output, 2) else: residual = residual_factory1(residual, first_output) glb_pool = L.Pooling(residual, pool=P.Pooling.AVE, global_pooling=True); fc = L.InnerProduct(glb_pool, num_output=1000) loss = L.SoftmaxWithLoss(fc, label) acc = L.Accuracy(fc, label, include=dict(phase=getattr(caffe_pb2, 'TEST'))) return to_proto(loss, acc) def make_net(): with open('residual_train_test.prototxt', 'w') as f: print(resnet('/path/to/caffe-train-lmdb', '/path/to/caffe-val-lmdb'), file=f) if __name__ == '__main__': make_net() caffe.Net('residual_train_test.prototxt', caffe.TEST) # test loading the net