# Tensorflow学习笔记-神经网络优化

xiaoxiao2021-02-28  0

# 神经网络优化

import tensorflow as tf import numpy as np BATCH_SIZE = 8 seed = 23455 rdm = np.random.RandomState(seed) X = rdm.rand(32, 2) Y_ = [[X1 + X2 + (rdm.rand() / 10.0 - 0.05)] for (X1, X2) in X] x = tf.placeholder(tf.float32, shape=(None, 2)) y_ = tf.placeholder(tf.float32, shape=(None, 1)) w1 = tf.Variable(tf.random_normal([2, 1], stddev=1, seed=1)) y = tf.matmul(x, w1) loss_mse = tf.reduce_mean(tf.square(y_ - y)) train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss_mse) with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) STEPS = 20000 for i in range(STEPS): start = (i * BATCH_SIZE) % 32 end = start + BATCH_SIZE sess.run(train_step, feed_dict={x: X[start:end], y_: Y_[start:end]}) if i % 500 == 0: print("After %d training step(s) " % i) print("w1 is ", sess.run(w1)) print("Final w1 is :\n", sess.run(w1))

tip:可以尝试改变学习率的值，观察参数变化和收敛情况

Final w1 is : [[0.98019385] [1.0159807 ]]

* 预测结果 * ： y = 0.98*X1 + 1.02*X2 基本符合

y_:实际值 y :预期值

### 上述if else语句如何转化成tensorflow语句呢？

loss = tf.reduce_sum(tf.where(tf.greater(y_, y), PROFIT(y_-y), COST(y-y_)))

softmax 函数： 将 n 分类的 n 个输出（y1,y2…yn） 变为满足以下概率分布要求的函数。

ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))

cem = tf.reduce_mean(ce)

import tensorflow as tf import numpy as np w = tf.Variable(tf.constant(5, dtype=tf.float32)) loss = tf.square(w+1) train_step = tf.train.GradientDescentOptimizer(.2).minimize(loss)#learning_rate为0.2 with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) STEPS = 40 for i in range(STEPS): sess.run(train_step) w_val = sess.run(w) loss_val = sess.run(loss) print("After %d step(s): w is %f, loss is %f" % (i, w_val, loss_val))

tip:可以改变学习率观察收敛结果和收敛速度

global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, LEARNING_RATE_STEP, LEARNING_RATE_DECAY, staircase=True/False)

staircase 设置为 True 时，表示 global_step/learning rate step 取整数，学习 率阶梯型衰减；若 staircase 设置为 false 时，学习率会是一条平滑下降的曲线。

import tensorflow as tf LEARNING_RATE_BASE = .1 # 初始学习率 LEARNING_RATE_DECAY = .99 # 衰减学习率 LEARNING_RATE_STEP = 1 # 喂入多少轮BATCH后，更新一次学习率，一般设为：sum/BATCH_SIZE global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, \ global_step, LEARNING_RATE_STEP, LEARNING_RATE_DECAY, staircase=True) w = tf.Variable(tf.constant(5, dtype=tf.float32)) loss = tf.square(w + 1) train_step = tf.train.GradientDescentOptimizer(learning_rate). \ minimize(loss, global_step=global_step) with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) STEPS = 40 for i in range(STEPS): sess.run(train_step) learning_rate_val = sess.run(learning_rate) global_step_val = sess.run(global_step) w_val = sess.run(w) loss_val = sess.run(loss) print("After %s steps: global_step is %f, w id %f, learning rate is %f, loss is %f" % (i, global_step_val, w_val, learning_rate_val, loss_val))

ema_op = ema.apply(tf.trainable_variables())

with tf.control_dependencies([train_step, ema_op]): train_op = tf.no_op(name='train')

import tensorflow as tf w1 = tf.Variable(0, dtype=tf.float32) global_step = tf.Variable(0, dtype = tf.float32) MOVING_AVERAGE_DECAY = 0.99 ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) #ema.apply后对应的是更新后的列表，每次更新sess.run(ema_op)时，对更新列表中的元素求滑动平均值 #在实际应用中会使用tf.trainable_variables()自动将所有训练参数汇总为列表 #ema.apply([w1]) ema_op = ema.apply(tf.trainable_variables()) with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) print(sess.run([w1, ema.average(w1)])) sess.run(tf.assign(global_step, 100)) sess.run(tf.assign(w1, 10)) sess.run(ema_op) print(sess.run([w1, ema.average(w1)])) sess.run(ema_op) print(sess.run([w1, ema.average(w1)])) sess.run(ema_op) print(sess.run([w1, ema.average(w1)])) sess.run(ema_op) print(sess.run([w1, ema.average(w1)])) sess.run(ema_op) print(sess.run([w1, ema.average(w1)])) 过拟合： 神经网络模型在训练数据集上的准确率较高，在新的数据进行预测或分类时准确率较 低， 说明模型的泛化能力差。

L1正则化：

L2正则化：

loss = loss(y 与 y_) + REGULARIZER*loss(w)

tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w) loss = cem + tf.add_n(tf.get_collection('losses')) ``` ```python import tensorflow as tf import numpy as np import matplotlib.pyplot as plt LEARNING_RATE_BASE = .001 LEARNING_RATE_DECAY = .999 BATCH_SIZE = 30 seed = 2 rdm = np.random.RandomState(seed) X = rdm.randn(300, 2) Y_ = [int((x0 * x0 + x1 * x1) < 2) for (x0, x1) in X] Y_c = [["red" if y else "blue"] for y in Y_] X = np.vstack(X).reshape(-1, 2) Y_ = np.vstack(Y_).reshape(-1, 1) plt.scatter(X[:, 0], X[:, 1], c=np.squeeze(Y_c)) plt.savefig('original.png') plt.show() def get_weight(shape, regularizer): w = tf.Variable(tf.random_normal(shape), dtype=tf.float32) tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w)) return w def get_bias(shape): b = tf.Variable(tf.constant(.01, shape=shape)) return b x = tf.placeholder(tf.float32, shape=(None, 2)) y_ = tf.placeholder(tf.float32, shape=(None, 1)) w1 = get_weight([2, 11], .01) b1 = get_bias([11]) y1 = tf.nn.relu(tf.matmul(x, w1) + b1) w2 = get_weight([11, 1], .01) b2 = get_bias([1]) y = tf.matmul(y1, w2) + b2 global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, 300/BATCH_SIZE, LEARNING_RATE_DECAY, staircase=True ) loss_mse = tf.reduce_mean(tf.square(y - y_)) loss_total = loss_mse + tf.add_n(tf.get_collection('losses')) train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_mse) with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) STEPS = 40000 for i in range(STEPS): start = (i * BATCH_SIZE) % 300 end = start + BATCH_SIZE sess.run(train_step, feed_dict={x: X[start: end], y_: Y_[start: end]}) if i % 2000 == 0: loss_mse_val = sess.run(loss_mse, feed_dict={x: X, y_: Y_}) print("After %d steps, loss is %f" % (i, loss_mse_val)) xx, yy = np.mgrid[-3:3:.01, -3:3:.01] grid = np.c_[xx.ravel(), yy.ravel()] probs = sess.run(y, feed_dict={x: grid}) probs = probs.reshape(xx.shape) print("w1:\n", sess.run(w1)) print("w1:\n", sess.run(b1)) print("w1:\n", sess.run(w2)) print("w1:\n", sess.run(b2)) plt.scatter(X[:, 0], X[:, 1], c=np.squeeze(Y_c)) plt.contour(xx, yy, probs, levels=[.5]) plt.savefig('result.png') plt.show()