# 深度学习实践（二）——多层神经网络

xiaoxiao2021-02-28  9

#一、准备 为了更深入的理解神经网络，笔者基本采用纯C++的手写方式实现，其中矩阵方面的运算则调用opencv，数据集则来自公开数据集a1a。 实验环境：

Visual studio 2017opencv3.2.0 a1a数据集

(1) Z [ 1 ] = W [ 1 ] A [ 0 ] + b [ 1 ] Z^{[1]} = W^{[1]}A^{[0]} +b^{[1]}\tag{1} (2) A [ 1 ] = R e l u ( Z [ 1 ] ) A^{[1]} = Relu(Z^{[1]})\tag{2} (3) Z [ 2 ] = W [ 2 ] A [ 1 ] + b [ 2 ] Z^{[2]} = W^{[2]}A^{[1]} +b^{[2]}\tag{3} (4) A [ 2 ] = R e l u ( Z [ 2 ] ) A^{[2]} = Relu(Z^{[2]})\tag{4} (5) Z [ 3 ] = W [ 3 ] A [ 2 ] + b [ 3 ] Z^{[3]} = W^{[3]}A^{[2]} +b^{[3]}\tag{5} (6) A [ 3 ] = S i g m o i d ( Z [ 3 ] ) A^{[3]} = Sigmoid(Z^{[3]})\tag{6} (7) L ( A [ 3 ] , Y ^ ) = − A [ 3 ] log ⁡ ( A [ 3 ] ) − ( 1 − Y ^ ) log ⁡ ( 1 − A [ 3 ] ) \mathcal{L}(A^{[3]}, \hat Y) = - A^{[3]}\log(A^{[3]}) - (1-\hat Y ) \log(1-A^{[3]})\tag{7}

The cost is then computed by summing over all training examples: (8) J = 1 m ∑ i = 1 m L ( A ( i ) [ 3 ] , Y ( i ) ) J = \frac{1}{m} \sum_{i=1}^m \mathcal{L}(A^{(i)[3]}, Y^{(i)})\tag{8}

#三、实践 数据集介绍、处理及一些公用的函数已在系列的上一篇文章，故在此不做赘述（只写出函数声明）。

void creatMat(Mat &x, Mat &y, String fileName)；

void initial_parermaters(Mat &w, double &b, int n1, int n0) { w = Mat::zeros(n1, n0, CV_64FC1); b = 0.0; //double temp = 2 / (sqrt(n1)); double temp = sqrt(6.0 / (double)(n1 + n0)); RNG rng; for (int i = 0; i < w.rows; i++) { for (int j = 0; j < w.cols; j++) { w.at<double>(i, j) = rng.uniform(-temp, temp);//xavier初始化 //w.at<double>(i, j) = 0; } } }

relu函数的编写：

void relu(const Mat &original, Mat &response) { response = original.clone();//防止维度不同 for (int i = 0; i < original.rows; i++) { for (int j = 0; j < original.cols; j++) { if (original.at<double>(i, j) < 0) { response.at<double>(i, j) = 0.0; } } } }

void linear_activation_forward(Mat &a_prev, Mat &a, Mat &w, double &b, string activation) { cv::Mat z; if (activation == "sigmoid") { z = (w*a_prev) + b; //cout << w.rows<<","<<w.cols<<" " << a_prev.rows<<","<<a_prev.cols<<endl; sigmoid(z, a); } else if (activation == "relu") { z = (w*a_prev) + b; //cout << w.rows << "," << w.cols << " " << a_prev.rows << "," << a_prev.cols << endl; relu(z, a); } }

void activation_backward(const Mat &a, const Mat &da, Mat &dz, string activation) { if (activation == "sigmoid") { dz = da.mul(a.mul(1 - a)); } else if (activation == "relu") { dz = da.clone();//保证维度相同 for (int i = 0; i < a.rows; i++) { for (int j = 0; j < a.cols; j++) { if (a.at<double>(i, j) <= 0) { dz.at<double>(i, j) = 0.0; } } } } } void linear_backward(const Mat &da, const Mat &a, const Mat &a_prev, Mat &w, double &b, Mat &dw, double &db, Mat &da_prev, const int m, const double learning_rate, string activation) { cv::Mat dz; activation_backward(a, da, dz, activation);//激活函数的反向传播 dw = (1.0 / m)*dz*a_prev.t(); db = (1.0 / m)*sum(dz)[0]; da_prev = w.t()*dz; w = w - (learning_rate * dw); b = b - (learning_rate * db); }

#四、实验结果分析 迭代8000次cost分析： 我们容易发现更高的学习率可以获得较低的cost值，但是当其迭代到一定次数时，会有一定的起伏。

#五、结语