上次发现约复杂的网络,拟合能力越强,但是test和train集的准确率差别变大,说明有过拟合的倾向。 下面是代码:
import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data #mnist已经作为官方的例子,做好了数据下载,分割,转浮点等一系列工作,源码在tensorflow源码中都可以找到 mnist = input_data.read_data_sets('MNIST_data', one_hot=True) # 配置每个 GPU 上占用的内存的比例 # 没有GPU直接sess = tf.Session() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) #每个批次的大小 batch_size = 200 #定义训练轮数据 train_epoch = 10 #定义每n轮输出一次 test_epoch_n = 1 #计算一共有多少批次 n_batch = mnist.train.num_examples // batch_size print("batch_size="+str(batch_size)+"n_batch="+str(n_batch)) #占位符,定义了输入,输出 x = tf.placeholder(tf.float32,[None, 784]) y = tf.placeholder(tf.float32,[None, 10]) #权重和偏置,使用0初始化 W1 = tf.Variable(tf.truncated_normal([784,2000],stddev=0.1)) b1 = tf.Variable(tf.zeros([2000])+0.1) #L1 = tf.nn.tanh(tf.matmul(x,W1)+b1) #L1 = tf.nn.relu(tf.matmul(x,W1)+b1) L1 = tf.nn.sigmoid(tf.matmul(x,W1)+b1) W2 = tf.Variable(tf.truncated_normal([2000,1000],stddev=0.1)) b2 = tf.Variable(tf.zeros([1000])+0.1) #L2 = tf.nn.tanh(tf.matmul(L1,W2)+b2) #L2 = tf.nn.relu(tf.matmul(L1,W2)+b2) L2 = tf.nn.sigmoid(tf.matmul(L1,W2)+b2) W3 = tf.Variable(tf.truncated_normal([1000,100],stddev=0.1)) b3 = tf.Variable(tf.zeros([100])+0.1) #L3 = tf.nn.tanh(tf.matmul(L2,W3)+b3) #L3 = tf.nn.relu(tf.matmul(L2,W3)+b3) L3 = tf.nn.sigmoid(tf.matmul(L2,W3)+b3) W4 = tf.Variable(tf.truncated_normal([100,10],stddev=0.1)) b4 = tf.Variable(tf.zeros([10])+0.1) L4 = tf.nn.sigmoid(tf.matmul(L3,W4)+b4) #这里定义的网络结构 prediction = tf.nn.softmax(L4) #损失函数是交叉熵 cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction)) #训练方法: #train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) train_step = tf.train.AdamOptimizer(1e-2).minimize(cross_entropy) #初始化sess中所有变量 init = tf.global_variables_initializer() sess.run(init) MaxACC = 0#最好的ACC saver = tf.train.Saver() #训练n个epoch for epoch in range(train_epoch): for batch in range(n_batch): batch_xs, batch_ys = mnist.train.next_batch(batch_size) sess.run(train_step, feed_dict = {x: batch_xs, y: batch_ys}) if(0==(epoch%test_epoch_n)):#每若干次预测test一次 #计算test集的准确率 correct_prediction = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) now_acc=sess.run(accuracy, feed_dict={x:mnist.test.images, y: mnist.test.labels}) print('epoch=',epoch,'ACC=',now_acc,'train acc =',sess.run(accuracy, feed_dict={x:mnist.train.images, y: mnist.train.labels})) if(now_acc>MaxACC): MaxACC = now_acc saver.save(sess, "Model/ModelSoftmax.ckpt") print('Save model! Now ACC=',MaxACC) #计算最终test集的准确率 correct_prediction = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print('Train OK! epoch=',epoch,'ACC=',sess.run(accuracy, feed_dict={x:mnist.test.images, y: mnist.test.labels})) #关闭sess sess.close() #读取模型 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: saver.restore(sess, "./Model/ModelSoftmax.ckpt") # 注意此处路径前添加"./" print('Load Model OK!') print('ACC=',sess.run(accuracy, feed_dict={x:mnist.test.images, y: mnist.test.labels}))最终输出结果: epoch= 0 ACC= 0.7969 train acc = 0.799182 Save model! Now ACC= 0.7969 epoch= 1 ACC= 0.9566 train acc = 0.962764 Save model! Now ACC= 0.9566 epoch= 2 ACC= 0.9613 train acc = 0.968036 Save model! Now ACC= 0.9613 epoch= 3 ACC= 0.9677 train acc = 0.975109 Save model! Now ACC= 0.9677 epoch= 4 ACC= 0.9698 train acc = 0.977145 Save model! Now ACC= 0.9698 epoch= 5 ACC= 0.9687 train acc = 0.979345 epoch= 6 ACC= 0.9653 train acc = 0.974636 epoch= 7 ACC= 0.9693 train acc = 0.980545 epoch= 8 ACC= 0.9694 train acc = 0.981091 epoch= 9 ACC= 0.9654 train acc = 0.978727 Train OK! epoch= 9 ACC= 0.9654
加入dropout
import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data #mnist已经作为官方的例子,做好了数据下载,分割,转浮点等一系列工作,源码在tensorflow源码中都可以找到 mnist = input_data.read_data_sets('MNIST_data', one_hot=True) # 配置每个 GPU 上占用的内存的比例 # 没有GPU直接sess = tf.Session() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) #每个批次的大小 batch_size = 200 #定义训练轮数据 train_epoch = 10 #定义每n轮输出一次 test_epoch_n = 1 #计算一共有多少批次 n_batch = mnist.train.num_examples // batch_size print("batch_size="+str(batch_size)+"n_batch="+str(n_batch)) #占位符,定义了输入,输出 x = tf.placeholder(tf.float32,[None, 784]) y = tf.placeholder(tf.float32,[None, 10]) #定义 keep_prob=tf.placeholder(tf.float32) #权重和偏置,使用0初始化 W1 = tf.Variable(tf.truncated_normal([784,2000],stddev=0.1)) b1 = tf.Variable(tf.zeros([2000])+0.1) L1 = tf.nn.sigmoid(tf.matmul(x,W1)+b1) L1_drop = tf.nn.dropout(L1,keep_prob) W2 = tf.Variable(tf.truncated_normal([2000,1000],stddev=0.1)) b2 = tf.Variable(tf.zeros([1000])+0.1) L2 = tf.nn.sigmoid(tf.matmul(L1_drop,W2)+b2) L2_drop = tf.nn.dropout(L2,keep_prob) W3 = tf.Variable(tf.truncated_normal([1000,100],stddev=0.1)) b3 = tf.Variable(tf.zeros([100])+0.1) L3 = tf.nn.sigmoid(tf.matmul(L2_drop,W3)+b3) L3_drop = tf.nn.dropout(L3,keep_prob) W4 = tf.Variable(tf.truncated_normal([100,10],stddev=0.1)) b4 = tf.Variable(tf.zeros([10])+0.1) L4 = tf.nn.sigmoid(tf.matmul(L3,W4)+b4) #这里定义的网络结构 prediction = tf.nn.softmax(L4) #损失函数是交叉熵 cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction)) #训练方法: #train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) train_step = tf.train.AdamOptimizer(1e-2).minimize(cross_entropy) #初始化sess中所有变量 init = tf.global_variables_initializer() sess.run(init) MaxACC = 0#最好的ACC saver = tf.train.Saver() #训练n个epoch for epoch in range(train_epoch): for batch in range(n_batch): batch_xs, batch_ys = mnist.train.next_batch(batch_size) sess.run(train_step, feed_dict = {x: batch_xs, y: batch_ys,keep_prob:0.7}) if(0==(epoch%test_epoch_n)):#每若干次预测test一次 #计算test集的准确率 correct_prediction = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) now_acc=sess.run(accuracy, feed_dict={x:mnist.test.images, y: mnist.test.labels,keep_prob:1.0}) print('epoch=',epoch,'ACC=',now_acc,'train acc =',sess.run(accuracy, feed_dict={x:mnist.train.images, y: mnist.train.labels,keep_prob:1.0})) if(now_acc>MaxACC): MaxACC = now_acc saver.save(sess, "Model/ModelSoftmax.ckpt") print('Save model! Now ACC=',MaxACC) #计算最终test集的准确率 correct_prediction = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print('Train OK! epoch=',epoch,'ACC=',sess.run(accuracy, feed_dict={x:mnist.test.images, y: mnist.test.labels,keep_prob:1.0})) #关闭sess sess.close() #读取模型 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: saver.restore(sess, "./Model/ModelSoftmax.ckpt") # 注意此处路径前添加"./" print('Load Model OK!') print('ACC=',sess.run(accuracy, feed_dict={x:mnist.test.images, y: mnist.test.labels,keep_prob:1.0}))最终结果: epoch= 0 ACC= 0.856 train acc = 0.858455 Save model! Now ACC= 0.856 epoch= 1 ACC= 0.8622 train acc = 0.866527 Save model! Now ACC= 0.8622 epoch= 2 ACC= 0.884 train acc = 0.893582 Save model! Now ACC= 0.884 epoch= 3 ACC= 0.9656 train acc = 0.971473 Save model! Now ACC= 0.9656 epoch= 4 ACC= 0.9663 train acc = 0.972618 Save model! Now ACC= 0.9663 epoch= 5 ACC= 0.969 train acc = 0.977655 Save model! Now ACC= 0.969 epoch= 6 ACC= 0.9661 train acc = 0.975982 epoch= 7 ACC= 0.9648 train acc = 0.9736 epoch= 8 ACC= 0.9698 train acc = 0.979345 Save model! Now ACC= 0.9698 epoch= 9 ACC= 0.9699 train acc = 0.978418 Save model! Now ACC= 0.9699 Train OK! epoch= 9 ACC= 0.9699
好像也没有好太多,还是过拟合情况不够严重。