参考:https://github.com/sujaybabruwad/LeNet-in-Tensorflow/blob/master/LeNet-Lab-Solution.ipynb
代码考过来如下:lenet-5.py
from tensorflow.examples.tutorials.mnist import input_data import numpy as np import random import numpy as np #import matplotlib.pyplot as plt from sklearn.utils import shuffle import tensorflow as tf from tensorflow.contrib.layers import flatten mnist = input_data.read_data_sets("MNIST_data/", reshape=False) X_train, y_train = mnist.train.images, mnist.train.labels X_validation, y_validation = mnist.validation.images, mnist.validation.labels X_test, y_test = mnist.test.images, mnist.test.labels assert(len(X_train) == len(y_train)) assert(len(X_validation) == len(y_validation)) assert(len(X_test) == len(y_test)) print(" ") print("Image Shape: {}".format(X_train[0].shape)) print(" ") print("Training Set: {} samples".format(len(X_train))) print("Validation Set: {} samples".format(len(X_validation))) print("Test Set: {} samples".format(len(X_test))) # Pad images with 0s X_train = np.pad(X_train, ((0,0),(2,2),(2,2),(0,0)), 'constant') X_validation = np.pad(X_validation, ((0,0),(2,2),(2,2),(0,0)), 'constant') X_test = np.pad(X_test, ((0,0),(2,2),(2,2),(0,0)), 'constant') print("Updated Image Shape: {}".format(X_train[0].shape)) index = random.randint(0, len(X_train)) image = X_train[index].squeeze() #plt.figure(figsize=(1,1)) #plt.imshow(image, cmap="gray") print(y_train[index]) X_train, y_train = shuffle(X_train, y_train) EPOCHS = 10 BATCH_SIZE = 128 from tensorflow.contrib.layers import flatten def LeNet(x): # Hyperparameters mu = 0 sigma = 0.1 # SOLUTION: Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6. conv1_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 1, 6), mean = mu, stddev = sigma)) conv1_b = tf.Variable(tf.zeros(6)) conv1 = tf.nn.conv2d(x, conv1_W, strides=[1, 1, 1, 1], padding='VALID') + conv1_b # SOLUTION: Activation. conv1 = tf.nn.relu(conv1) # SOLUTION: Pooling. Input = 28x28x6. Output = 14x14x6. conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') # SOLUTION: Layer 2: Convolutional. Output = 10x10x16. conv2_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16), mean = mu, stddev = sigma)) conv2_b = tf.Variable(tf.zeros(16)) conv2 = tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='VALID') + conv2_b # SOLUTION: Activation. conv2 = tf.nn.relu(conv2) # SOLUTION: Pooling. Input = 10x10x16. Output = 5x5x16. conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') # SOLUTION: Flatten. Input = 5x5x16. Output = 400. fc0 = flatten(conv2) # SOLUTION: Layer 3: Fully Connected. Input = 400. Output = 120. fc1_W = tf.Variable(tf.truncated_normal(shape=(400, 120), mean = mu, stddev = sigma)) fc1_b = tf.Variable(tf.zeros(120)) fc1 = tf.matmul(fc0, fc1_W) + fc1_b # SOLUTION: Activation. fc1 = tf.nn.relu(fc1) # SOLUTION: Layer 4: Fully Connected. Input = 120. Output = 84. fc2_W = tf.Variable(tf.truncated_normal(shape=(120, 84), mean = mu, stddev = sigma)) fc2_b = tf.Variable(tf.zeros(84)) fc2 = tf.matmul(fc1, fc2_W) + fc2_b # SOLUTION: Activation. fc2 = tf.nn.relu(fc2) # SOLUTION: Layer 5: Fully Connected. Input = 84. Output = 10. fc3_W = tf.Variable(tf.truncated_normal(shape=(84, 10), mean = mu, stddev = sigma)) fc3_b = tf.Variable(tf.zeros(10)) logits = tf.matmul(fc2, fc3_W) + fc3_b return logits x = tf.placeholder(tf.float32, (None, 32, 32, 1)) y = tf.placeholder(tf.int32, (None)) one_hot_y = tf.one_hot(y, 10) rate = 0.001 logits = LeNet(x) cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=one_hot_y) loss_operation = tf.reduce_mean(cross_entropy) optimizer = tf.train.AdamOptimizer(learning_rate = rate) training_operation = optimizer.minimize(loss_operation) correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1)) accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) saver = tf.train.Saver() def evaluate(X_data, y_data): num_examples = len(X_data) total_accuracy = 0 sess = tf.get_default_session() for offset in range(0, num_examples, BATCH_SIZE): batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE] accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y}) total_accuracy += (accuracy * len(batch_x)) return total_accuracy / num_examples with tf.Session() as sess: sess.run(tf.global_variables_initializer()) num_examples = len(X_train) print("Training...") print(" ") for i in range(EPOCHS): X_train, y_train = shuffle(X_train, y_train) for offset in range(0, num_examples, BATCH_SIZE): end = offset + BATCH_SIZE batch_x, batch_y = X_train[offset:end], y_train[offset:end] sess.run(training_operation, feed_dict={x: batch_x, y: batch_y}) validation_accuracy = evaluate(X_validation, y_validation) print("EPOCH {} ...".format(i+1)) print("Validation Accuracy = {:.3f}".format(validation_accuracy)) print(" ") saver.save(sess, 'lenet') print("Model saved") with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint('.')) test_accuracy = evaluate(X_test, y_test) print("Test Accuracy = {:.3f}".format(test_accuracy))其中cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=one_hot_y)原文是没有logits=和 labels=的,这样会报错:ValueError: Only call softmax_cross_entropy_with_logits with named arguments (labels=…, logits=…, …)