首页 [机器学习]Kaggle Digit Recognizer代码(Tensorflow + CNN)重新整理
文章
取消

[机器学习]Kaggle Digit Recognizer代码(Tensorflow + CNN)重新整理

今天看了《Hands-On Machine Learning with Scikit-Learn & TensorFlow》CNN的章节,细节处理上更清晰了些。用LeNet5网络将kaggle上的 Digit Recognizer 问题的代码重新整理实现了一遍。以100行为一个patch,feed完所有训练数据后随机打乱数据再次迭代,迭代3500次左右用时332s,训练集(90%的train.csv数据)上的准确率0.959961,测试集(剩下10%的train.csv数据)上的准确率为 0.96619。迭代次数增多准确率会缓慢增长15分钟迭代测试集上的准确率大概能到0.991,差不多算这个模型上的极限了,有空再调参看看有没有改善空间。

模型结构:

1
2
3
4
5
6
7
8
9
LeNet-5 Architecture
layer   operation       feature-maps    kernel  stride  size     activation
in      input           1(gray image)   -       -       28*28    -
C1      convolution     16              5*5     1       28*28    relu
S2      avg pool        16              2*2     2       14*14    relu
C3      convolution     32              3*3     1       14*14    relu
S4      avg pool        32              2*2     2       7*7      relu
F5      full connected  -               -       -       256      relu
out     full connected  -               -       -       10       -

部分输出:


epoch 3200, training accuracy 0.961914, validate accuracy 0.96881
epoch 3300, training accuracy 0.959961, validate accuracy 0.96619
training done
total training time: 332.004180908s

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# coding: utf-8
#!/usr/bin/python2

import tensorflow as tf
import pandas as pd
import numpy as np
import time

class DigitsModelCNN(object):
    def __init__(self):
        self.train_input = tf.placeholder(tf.float32, shape=[None,784])
        self.train_out = tf.placeholder(tf.float32, shape=[None,10])
        self.keep_prob = tf.placeholder(tf.float32)
        self.sess = tf.Session()

        # 21000 =》100*210
        self.batch_size = 100
        self.epochs = 210*16
        self.learn_rate = 5e-4

    '''
    @func       Computes a 2-D convolution given 4-D input and filter tensors.
    @param      input   4-D input tensor of shape [batch, in_height, in_width, in_channels]
                filter  4-D filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels]
    @return
    '''
    def conv2d(self, input, filter, stride_w=1, stride_h=1):
        return tf.nn.conv2d(input, filter, strides=[1,stride_w,stride_h,1], padding='SAME')

    '''
    @func       Performs the max pooling on the input.
    @param      input   4-D Tensor with shape [batch, height, width, channels] and type tf.float32
                ksize   A list of ints that has length >= 4. The size of the window for each dimension of the input tensor.
                strides A list of ints that has length >= 4. The stride of the sliding window for each dimension of the input tensor
    @return
    '''
    def max_pool_2x2(self, input, stride_w=2, stride_h=2):
        return tf.nn.max_pool(input, ksize=[1,2,2,1], strides=[1,stride_w,stride_h,1], padding="SAME")

    '''
    @func       outputs random values from a truncated normal distribution.
    '''
    def init_w(self,shape):
        # the standard deviation is 0.1
        value = tf.truncated_normal(shape=shape, stddev=0.1)
        return tf.Variable(value)

    '''
    @func       outputs random values as bias
    '''
    def init_b(self,shape):
        value = tf.constant(0.1, shape=shape)
        return tf.Variable(value)

    '''
    @note LeNet-5 Architecture
            layer   operation       feature-maps    kernel  stride  size     activation
            in      input           1(gray image)   -       -       28*28    -
            C1      convolution     16              5*5     1       28*28    relu
            S2      avg pool        16              2*2     2       14*14    relu
            C3      convolution     32              3*3     1       14*14    relu
            S4      avg pool        32              2*2     2       7*7      relu
            F5      full connected  -               -       -       256      relu
            out     full connected  -               -       -       10       -
    '''
    def build(self):
        self.train_input = tf.placeholder(tf.float32, shape=[None,784])

        self.input = tf.reshape(self.train_input, [-1, 28, 28, 1])
        self.f_c1 = self.init_w([5,5,1,16])
        self.b_c1 = self.init_b([16])
        self.c1 = tf.nn.relu(self.conv2d(self.input, self.f_c1) + self.b_c1)
        self.s2 = self.max_pool_2x2(self.c1)

        self.f_c3 = self.init_w([5,5,16,32])
        self.b_c3 = self.init_b([32])
        self.c3 = tf.nn.relu(self.conv2d(self.s2, self.f_c3) + self.b_c3)
        self.s4 = self.max_pool_2x2(self.c3)

        self.w_f5 = self.init_w([7*7*32, 256])
        self.b_f5 = self.init_b([256])
        self.x_f5 = tf.reshape(self.s4, [-1,7*7*32])
        self.f5 = tf.nn.relu(tf.matmul(self.x_f5, self.w_f5) + self.b_f5)

        # out@10
        self.f5_drop = tf.nn.dropout(self.f5, self.keep_prob)
        self.w_out = self.init_w([256,10])
        self.b_out = self.init_b([10])
        self.out = tf.nn.softmax(tf.matmul(self.f5_drop, self.w_out) + self.b_out)

        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.out, labels=self.train_out))
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learn_rate).minimize(self.loss)

        predict = tf.equal(tf.argmax(self.out,1), tf.argmax(self.train_out,1))
        self.accuracy = tf.reduce_mean(tf.cast(predict, tf.float32))


    def train(self, train_x, train_y, test_x, test_y, keep_prob=0.1):
        print("start training")

        self.sess.run(tf.global_variables_initializer())

        batch_start = 0
        batch_end = batch_start + self.batch_size

        print(self.train_input.shape)
        print(self.train_out.shape)

        for epoch in range(self.epochs):
            _, loss, prob = self.sess.run([self.optimizer, self.loss, self.out],feed_dict={
                self.train_input :  train_x[batch_start:batch_end],
                self.train_out:     train_y[batch_start:batch_end],
                self.keep_prob :    keep_prob
            })

            if epoch %100 == 0:
                train_accuracy = self.sess.run(self.accuracy, feed_dict={
                    self.train_input:   train_x[0:1024],
                    self.train_out:     train_y[0:1024],
                    self.keep_prob:     1.0
                })
                validate_accuracy = self.sess.run(self.accuracy, feed_dict={
                    self.train_input:   test_x,
                    self.train_out:     test_y,
                    self.keep_prob:     1.0
                })
                print("epoch %d, training accuracy %g, validate accuracy %g" % (epoch, train_accuracy, validate_accuracy))

            batch_start = batch_end
            batch_end = batch_start + self.batch_size
            if(batch_end > train_x.shape[0]):
                print("reset batch")
                batch_start = 0
                batch_end = batch_start + self.batch_size
                train_x, train_y = self.permutation(train_x, train_y)

        print("training done")

    def permutation(selfself, x, y):
        sequence = np.random.permutation(x.shape[0])
        return x[sequence], y[sequence]

    def info(self):
        print("c1,s2,c3,s4,c5 shape:")
        print(self.c1.shape)
        print(self.s2.shape)
        print(self.c3.shape)
        print(self.s4.shape)
        print(self.f5.shape)
        print('-'*16)
        print(train_x.shape)
        print(train_y.shape)

def dense_to_one_hot(labels_dense, num_classes):
    num_labels = labels_dense.shape[0]
    index_offset = np.arange(num_labels) * num_classes
    labels_one_hot = np.zeros((num_labels, num_classes))
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    return labels_one_hot

def load_data(filename, train_data=True, split=0.9):
    data_frame = pd.read_csv(filename)
    # (42000, 785)
    print(data_frame.shape)

    train_data_len = data_frame.shape[0]
    train_data_split = int(train_data_len*split)
    print(train_data_split)

    train_x = data_frame.iloc[:train_data_split, 1:].values
    train_x = train_x.astype(np.float)
    train_x = np.multiply(train_x, 1.0/255.0)

    train_y = data_frame.iloc[:train_data_split, 0].values
    train_y = dense_to_one_hot(train_y,10)

    validate_x = data_frame.iloc[train_data_split:, 1:].values
    validate_x = validate_x.astype(np.float)
    validate_x = np.multiply(validate_x, 1.0/255.0)

    validate_y = data_frame.iloc[train_data_split:, 0].values
    validate_y = dense_to_one_hot(validate_y,10)

    print(train_x.shape)
    print(train_y.shape)
    print(validate_x.shape)
    print(validate_y.shape)
    return  train_x, train_y, validate_x, validate_y

train_x, train_y, validate_x, validate_y = load_data('./data/train.csv')

print(train_y.shape)
print(train_y[0:4,])

cnn = DigitsModelCNN()
cnn.build()
cnn.info()

time_start = time.time()
cnn.train(train_x, train_y, validate_x, validate_y)
time_end = time.time()
print("total training time:")
print(time_end-time_start)

本文由作者按照 CC BY 4.0 进行授权

[图像处理]基于Python Opencv的数独解析程序

[设计模式]订阅-分发模式实现消息中心组件