回归问题(附篇1):当目标函数为一元一次函数,即其最小二乘的损失函数为二元二次函数时,在python中采用全量梯度下降、随机梯度下降、批量梯度下降求解损失函数。

xiaoxiao2025-07-27  20

import tensorflow as tf import numpy as np from mpl_toolkits.mplot3d import Axes3D from matplotlib import pyplot as plt from datetime import datetime import random t0 = datetime.now() x_data = np.random.randn(int(1.0e2)).astype(np.float32) y_data = x_data * 0.3 + 0.15 # 目标函数为一元一次,则其最小二乘的loss函数二元二次 # 在损失函数loss1中,小写字母代表常数,大写字母代表自变量和因变量。 # x_data的元素越多,系数的绝对值越大,图像斜率越大;反之斜率越小。 a = np.sum(x_data**2); b = float(len(x_data)) c = 2*np.sum(x_data); d = -2*np.sum(x_data*y_data) e = -2*np.sum(y_data); f = np.sum(y_data**2) # 对于二元二次函数,若二次型的系数矩阵正定,则碗朝上,函数有最小值; # 若二次型的系数矩阵负定,则碗朝下,函数有最大值;若不定,则其图像为马鞍型。 # 对损失函数loss1作图 fig = plt.figure() ax = Axes3D(fig) W = np.arange(-100, 100+0.1, 1) B = np.arange(-100, 100+0.1, 1) W, B = np.meshgrid(W, B) # 网格的创建,这个是关键; W在前,是x坐标,B在后,是y坐标。 loss1 = a*W**2 + b*B**2 + c*W*B + d*W + e*B + f plt.xlabel('W') plt.ylabel('B') ax.plot_surface(W, B, loss1, rstride=1, cstride=1, cmap='rainbow') plt.show() # 占位符没有指定维度,则根据实际传入的数据维度自动调整其维度 x_ph = tf.placeholder(tf.float32) y_ph = tf.placeholder(tf.float32) # 设置损失函数自变量的初始点,本例中最小值点位于(0.3,0.15) weight_initial = 1.0e4 bias_initial = 1.0e4 weight = tf.Variable(weight_initial) bias = tf.Variable(bias_initial) y_model = weight * x_ph + bias loss2 = tf.reduce_sum((y_model - y_ph)**2) loss2_mean = loss2/len(x_data) learning_rate = 1e-1 # learning_rate的设置至关重要,太小了收敛太慢,太大了无法收敛。 train_op = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss2) sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) print('初始点是:({},{})'.format(weight_initial, bias_initial)) step = 0 # 设置初始的迭代次数 loop = 0 # 设置初始的循环轮数 threshold = 1e-5 # 设置允许误差的阈值 algorithm = 3 # 1代表全量梯度下降,2代表逐样本随机梯度下降,3代表批量(随机)梯度下降 mini_batch_size = 5 # 设置每一批迭代的样本数量,要能够被len(x_data)整除,否则后面循环时会报错 flag = True while flag: if algorithm == 1: # 将已知数据x_data和y_data一次性传入,为全量梯度下降,learning_rate应选小一些的数值 sess.run(train_op, feed_dict={x_ph: x_data, y_ph: y_data}) step += 1 print('第%s次迭代,W is %.2f, B is %.2f' % (step, weight.eval(sess), bias.eval(sess))) elif algorithm == 2: # 将已知数据x_data和y_data打乱后每次依次传入一对元素,为逐样本随机梯度下降,learning_rate应选大一些的数值 random.shuffle(x_data) y_data = x_data * 0.3 + 0.15 # 每次shuffle之后应当再定义一次目标函数,否则x_data和y_data将不能对应 # print('x_data are:', x_data) # print('y_data are:', y_data) for (x, y) in zip(x_data, y_data): sess.run(train_op, feed_dict={x_ph: x, y_ph: y}) step += 1 print('第%s次迭代,W is %.2f, B is %.2f' % (step, weight.eval(sess), bias.eval(sess))) loop += 1 print('完成第%s轮循环' % loop) elif algorithm == 3: # 将已知数据x_data和y_data打乱后每次依次传入一批数据对,为小批量(随机)梯度下降,learning_rate应选适中的数值 random.shuffle(x_data) y_data = x_data * 0.3 + 0.15 # 每次shuffle之后应当再定义一次目标函数,否则x_data和y_data将不能对应 for i in range(0, len(x_data), mini_batch_size): x_mini_batch = [] y_mini_batch = [] for j in range(i, i + mini_batch_size, 1): x_mini_batch.append(x_data[j]) y_mini_batch.append(y_data[j]) sess.run(train_op, feed_dict={x_ph: x_mini_batch, y_ph: y_mini_batch}) step += 1 print('第%s次迭代,W is %.2f, B is %.2f' % (step, weight.eval(sess), bias.eval(sess))) loop += 1 print('完成第%s轮循环' % loop) if sess.run(loss2_mean, feed_dict={x_ph: x_data, y_ph: y_data}) <= threshold: print('满足精度要求') break # 跳出break所在循环,即while循环 if abs(weight.eval(sess)) > weight_initial*2 or abs(bias.eval(sess)) > bias_initial*2: print('learning_rate选得过大') break plt.plot(x_data, y_data, 'ro', label='Original data') plt.plot(x_data, sess.run(weight) * x_data + sess.run(bias), label='Fitted line') plt.legend() plt.show() t1 = datetime.now() print('耗时:', t1-t0)
转载请注明原文地址: https://www.6miu.com/read-5033796.html

最新回复(0)