数日前にarXivに投稿されていたmixup法(https://arxiv.org/abs/1710.09412)をMNISTで試してみました。 手法自体は簡単で、ニューラルネットワークの学習時に使う入力と出力を2つずつランダムに選び、それを比率λで混ぜるだけです。式で書くと、ランダムに選んだ入力と出力を(xi,yi),(xj,yj)のように2つ選ぶとき、学習時に渡す入力˜xと出力˜yはそれぞれ、 ˜x=λxi+(1−λ)xj ˜y=λyi+(1−λ)yj となります。λはサンプルごとにベータ分布からランダムに決めます。
今回はMNISTで試すので、数字が半透明で重なります。ベータ分布のα=β=2で32個生成した結果が下図です。
α=βの下で、αを色々変えて学習した結果が下表です。学習は30エポックのみです。
α | Train acc | Validation acc |
n/a | 0.999933 | 0.991600 |
0.1 | 0.999767 | 0.991800 |
0.2 | 0.999483 | 0.990600 |
0.3 | 0.999283 | 0.990300 |
0.4 | 0.998983 | 0.991600 |
0.5 | 0.998633 | 0.989500 |
1 | 0.997483 | 0.989700 |
2 | 0.996383 | 0.990000 |
1行目は通常の方法で混ぜずに学習させた場合の結果です。Train accの計算には混ぜていない学習用データを用いています。αを増やすと半々で混ざるサンプルの確率が増え、Train accが徐々に下がっていきます。一方、Validation accは下がっているようにも見えますが、α=0.1の場合は若干上がっています。差が小さすぎるので誤差かもしれません。
論文に書かれているような効果は今回のMNIST実験では見られませんでしたが、こういう混ぜ方をしても学習ができることは確認できました。
以下、今回の実験で使ったコードです。122行目のeh.alphaの値を変えることでαを変更します。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | # -*- coding: utf-8 -*-
import sys,os,math
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../keras-examples/src')
import numpy as np
import keras
from keras.models import Sequential, Model, Input, model_from_json
from keras.layers import Dense, Activation, Reshape, Flatten, Dropout
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.advanced_activations import LeakyReLU
from keras.utils import to_categorical
from util.history import ExperimentHistory
from keras.datasets import mnist
from PIL import Image, ImageDraw, ImageFont
TRAIN_EPOCH = 30
GENERATED_IMAGE_PATH = 'generated_images/'
FONT_DIR = "C:/Windows/Fonts"
# Image size is the same as MNIST
IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
def make_model():
model = Sequential()
model.add(Conv2D(64, (5, 5), strides=(2, 2), padding='same',
input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, 1),
data_format='channels_last'))
model.add(LeakyReLU(0.2))
model.add(Conv2D(128, (5, 5), strides=(2, 2), data_format='channels_last'))
model.add(LeakyReLU(0.2))
model.add(Flatten())
model.add(Dense(256))
model.add(LeakyReLU(0.2))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))
print(model.summary())
return model
def combine_images(generated_images, margin=20):
total = len(generated_images)
cols = int(math.sqrt(total))
rows = math.ceil(float(total)/cols)
width, height = generated_images[0].shape[0:2]
print(generated_images[0].shape)
combined_image = np.ones(((height+margin)*rows, width*cols), dtype=generated_images[0].dtype)*-0.5
for index, image in enumerate(generated_images):
i = int(index/cols)
j = index % cols
combined_image[(height+margin)*i:(height+margin)*i+height, width*j:width*(j+1)] = image[:, :, 0]
return combined_image, cols, rows, width, height, margin
# Data generator
def data_gen(x, y, batch_size, alpha, num_classes):
# Generate data eternally
assert len(x) == len(y)
epoch = 0
index = 0
font = ImageFont.truetype(FONT_DIR+"/{}".format("Tahoma.ttf"), 10) # For drawing numbers
while True:
index += 1
data = []
labels = []
for i in range(batch_size):
i1 = np.random.randint(0, len(x))
i2 = np.random.randint(0, len(x))
lam = np.random.beta(alpha, alpha)
# Mixup two samples
xm = lam * x[i1] + (1.0 - lam) * x[i2]
ym = lam * y[i1] + (1.0 - lam) * y[i2]
data.append(xm)
labels.append(ym)
## Draw mixed-up images
# image, cols, rows, width, height, margin = combine_images(data)
# image = image*127.5 + 127.5
# if not os.path.exists(GENERATED_IMAGE_PATH):
# os.mkdir(GENERATED_IMAGE_PATH)
# img = Image.fromarray(image.astype(np.uint8))
# draw = ImageDraw.Draw(img)
# for i in range(len(data)):
# yi = int(i/cols)
# xi = i % cols
# img.save(GENERATED_IMAGE_PATH+"%04d_%04d.png" % (epoch, index))
yield np.array(data), np.array(labels).reshape((batch_size, num_classes))
def run(eh):
# Prepare MNIST data
(x_train, y_train), (x_val, y_val) = mnist.load_data()
x_val = (x_val.astype(np.float32) - 127.5)/127.5
x_val = x_val.reshape(x_val.shape[0], x_val.shape[1], x_val.shape[2], 1)
y_val = keras.utils.to_categorical(y_val, num_classes=10)
x_train = (x_train.astype(np.float32) - 127.5)/127.5
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
y_train = keras.utils.to_categorical(y_train, num_classes=10)
# Make a model
opt = keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999,
epsilon=1e-08, decay=1e-4)
model = make_model()
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
# Train the model
dg = data_gen(x_train, y_train, eh.batch_size, eh.alpha, 10)
hist = model.fit_generator(dg, steps_per_epoch=len(x_train)/eh.batch_size, epochs=TRAIN_EPOCH,
validation_data=(x_val, y_val)).history
# Evaluate the model by train and validation data
score_train = model.evaluate(x_train, y_train, batch_size=eh.batch_size)
score_val = model.evaluate(x_val, y_val, batch_size=eh.batch_size)
comments = "#Final model loss_train={0:10.6f} acc_train={1:10.6f} loss_val={2:10.6f} acc_val={3:10.6f}"\
.format(score_train[0], score_train[1], score_val[0], score_val[1])
eh.write("history.log", model, opt, hist, comments)
if __name__ == '__main__':
eh = ExperimentHistory()
eh.batch_size = 32
eh.alpha = 2
run(eh)
|
0 件のコメント :
コメントを投稿