Deep-learning/digit recognition-CNN1.py at master · vamsiry/Deep-learning · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 31 03:28:08 2017

@author: vreddy
"""

from tensorflow.contrib import learn
from tensorflow.contrib import layers
from tensorflow.contrib import losses
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from sklearn import metrics
import numpy as np

tf.logging.set_verbosity(tf.logging.INFO)

# read digit images of 28 x 28 = 784 pixels size
# target is image value in [0,9] range; one-hot encoded to 10 columns
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)


x_train = mnist.train.images
y_train = mnist.train.labels

x_validation = mnist.validation.images
y_validation = mnist.validation.labels

x_test = mnist.test.images
y_test = mnist.test.labels


# Hidden layers generally use sigmoid perceptrons
# Output layer uses softmax for overall interpretability of all the 10 outputs
def model_function(features, targets, mode):

    # input layer
    # Reshape features to 4-D tensor (55000x28x28x1)
    # MNIST images are 28x28 pixels
    # batch size corresponds to number of images: -1 represents ' compute the # images automatically (55000)'
    # +1 represents the # channels. Here #channels =1 since grey image. For color image, #channels=3
    input_layer = tf.reshape(features, [-1,28,28,1])


    # Computes 32 features using a 5x5 filter
    # Padding is added to preserve width
    # Input Tensor Shape: [batch_size,28,28,1]
    # Output Tensor Shape: [batch_size,28,28,32]
    conv1 = layers.conv2d(
                inputs=input_layer,
                num_outputs=32,
                kernel_size=[5,5],
                stride=1,
                padding="SAME", # do so much padding such that the feature map is same size as input
                activation_fn=tf.nn.relu)

    # Pooling layer 1
    # Pooling layer ith a 2x2 filter and stride 2
    # Input shape: [batch_size,28,28,32]
    # Output shape: [batch_size,14,14,32]
    pool1 = layers.max_pool2d(inputs=conv1,kernel_size=[2,2], stride=2)

    # Convolution layer 2
    # Input: 14 x 14 x 32 (32 channels here)
    # Output: 14 x 14 x 64  (32 features/patches fed to each perceptron; discovering 64 features)
    conv2 = layers.conv2d(
                inputs=pool1,
                num_outputs=64,
                kernel_size=[5,5],
                stride=1,
                padding="SAME", # do so much padding such that the feature map is same size as input
                activation_fn=tf.nn.relu)

    # Pooling layer 2
    # Input: 14 x14 x 64
    # Output: 7 x 7 x 64
    pool2 = layers.max_pool2d(inputs=conv2,kernel_size=[2,2], stride=2)


    # Flatten the pool2 to feed to the 1st layer of fully connected layers
    # Input size: [batch_size,7,7,64]
    # Output size: [batch_size, 7x7x64]
    pool2_flat = tf.reshape(pool2,[-1,7*7*64])


    # Connected layers with 100, 20 neurons
    # Input shape: [batch_size, 7x7x64]
    # Output shape: [batch_size, 10]
    fclayers = layers.stack(pool2_flat, layers.fully_connected, [100,20],
                             activation_fn=tf.nn.relu, weights_regularizer=layers.l1_l2_regularizer(1.0,2.0),
                             weights_initializer=layers.xavier_initializer(uniform=True,seed=100))


    outputs = layers.fully_connected(inputs=fclayers,
                                     num_outputs=10, # 10 perceptrons in output layer for 10 numbers (0 to 9)
                                     activation_fn=None) # Use "None" as activation function specified in "softmax_cross_entropy" loss


    # Calculate loss using cross-entropy error; also use the 'softmax' activation function
    loss = losses.softmax_cross_entropy (outputs, targets)

    optimizer = layers.optimize_loss(
                  loss=loss,
                  global_step=tf.contrib.framework.get_global_step(),
                  learning_rate=0.1,
                  optimizer="SGD")

    # Class of output (i.e., predicted number) corresponds to the perceptron returning the highest fractional value
    # Returning both fractional values and corresponding labels
    probs = tf.nn.softmax(outputs)
    return {'probs':probs, 'labels':tf.argmax(probs, 1)}, loss, optimizer
    # Applying softmax on top of plain outputs from layer (linear activation function since activation_fn=None) to give results


classifier = learn.Estimator(model_fn=model_function, model_dir='/home/vreddy/Modeldir/cnn')

classifier.fit(x=x_train, y=y_train, steps=500, batch_size=100)

for var in classifier.get_variable_names()    :
    print var, ": ", classifier.get_variable_value(var).shape, " - ", classifier.get_variable_value(var)

#evaluate the model using validation set
results = classifier.evaluate(x=x_validation, y=y_validation, steps=1)
type(results)
for key in sorted(results):
    print "%s:%s" % (key, results[key])

# Predict the outcome of test data using model
predictions = classifier.predict(x_test, as_iterable=True)
for i, p in enumerate(predictions):
   print("Prediction %s: %s, probs = %s" % (i+1, p["labels"], p["probs"]))

# Compute the accuracy metrics
# call with as_iterable=False to get all predictions together
predictions = classifier.predict(x_test)
metrics.accuracy_score(np.argmax(y_test, 1), predictions['labels'])

# checking how well this fit the train data
predictions = classifier.predict(x_train)
metrics.accuracy_score(np.argmax(y_train, 1), predictions['labels'])