为了深入理解Gradient Descent算法,写了如下代码。
在y = 2x直线上生成随机高斯噪声
# -*- coding: utf-8 -*- """ Created on Wed Aug 30 15:27:51 2017 @author: liuxy """ import numpy as np import matplotlib.pyplot as plt def gen_data(size): x = np.arange(0, size, 1) e = np.random.normal(0, 3, size) y = 2*x + e return [x, y] def compute_gradient_full(data, w): X = data[0] Y = data[1] N = len(X) g = np.sum(2*X*(X*w - Y))/N return g def compute_gradient_SGD(data, w): X = data[0] Y = data[1] idx = np.random.randint(0, len(X)-1) d = X[idx] t = Y[idx] g = 2*d*(d*w - t) return g def compute_gradient_miniBatch(data, w): X = data[0] Y = data[1] N = 16 X_b = [] Y_b = [] for i in range(N): idx = np.random.randint(0, len(X)-1) X_b.append(X[idx]) Y_b.append(Y[idx]) X_ba = np.array(X_b) Y_ba = np.array(Y_b) g = np.sum(2*X_ba*(X_ba*w - Y_ba))/N return g def Optimizer(data, w, learning_rate, num_iterator, method, Wts): for i in range(num_iterator): g = 0 if ('full' == method): g = compute_gradient_full(data, w) if ('mini' == method): g = compute_gradient_miniBatch(data, w) if ('sgd' == method): g = compute_gradient_SGD(data, w) w = w - learning_rate * g Wts.append(w) data = gen_data(100) #plt.scatter(data[0], data[1]) lr = 0.000020 w = 6 num = 100 Weights_full = [] Weights_mini = [] Weights_sgd = [] Weights_full.append(w) Weights_mini.append(w) Weights_sgd.append(w) Optimizer(data, w, lr, num, 'full', Weights_full) Optimizer(data, w, lr, num, 'mini', Weights_mini) Optimizer(data, w, lr, num, 'sgd', Weights_sgd) plt.plot(np.arange(0,num+1), Weights_full) plt.plot(np.arange(0,num+1), Weights_mini) plt.plot(np.arange(0,num+1), Weights_sgd)
权重变化, full, mini batch, sgd