为了显示各层的梯度变化,阅读了此篇博客http://blog.csdn.net/yihaizhiyan/article/details/44159063
但是新版caffe solver.cpp中做了优化,所以函数添加位置有变,在此记录下
修改目录 caffe/src/caffe/solver.cpp
开头修改如下:
#include <cstdio> #include <algorithm> #include <string> #include <vector> #include <iomanip> #include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/solver.hpp" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/util/upgrade_proto.hpp" using std::max; using std::min; using std::setw; using std::scientific; using std::left;
后面修改
void Solver<Dtype>::Step(int iters) { const int start_iter = iter_; const int stop_iter = iter_ + iters; int average_loss = this->param_.average_loss(); losses_.clear(); smoothed_loss_ = 0; iteration_timer_.Start(); while (iter_ < stop_iter) { // zero-init the params net_->ClearParamDiffs(); if (param_.test_interval() && iter_ % param_.test_interval() == 0 && (iter_ > 0 || param_.test_initialization())) { if (Caffe::root_solver()) { TestAll(); } if (requested_early_exit_) { // Break out of the while loop because stop was requested while testing. break; } } for (int i = 0; i < callbacks_.size(); ++i) { callbacks_[i]->on_start(); } const bool display = param_.display() && iter_ % param_.display() == 0; net_->set_debug_info(display && param_.debug_info()); // accumulate the loss and gradient Dtype loss = 0; for (int i = 0; i < param_.iter_size(); ++i) { loss += net_->ForwardBackward(); } loss /= param_.iter_size(); // average the loss across iterations for smoothed reporting UpdateSmoothedLoss(loss, start_iter, average_loss); if (display) { float lapse = iteration_timer_.Seconds(); float per_s = (iter_ - iterations_last_) / (lapse ? lapse : 1); LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_ << " (" << per_s << " iter/s, " << lapse << "s/" << param_.display() << " iters), loss = " << smoothed_loss_; iteration_timer_.Start(); iterations_last_ = iter_; const vector<Blob<Dtype>*>& result = net_->output_blobs(); int score_index = 0; for (int j = 0; j < result.size(); ++j) { const Dtype* result_vec = result[j]->cpu_data(); const string& output_name = net_->blob_names()[net_->output_blob_indices()[j]]; const Dtype loss_weight = net_->blob_loss_weights()[net_->output_blob_indices()[j]]; for (int k = 0; k < result[j]->count(); ++k) { ostringstream loss_msg_stream; if (loss_weight) { loss_msg_stream << " (* " << loss_weight << " = " << loss_weight * result_vec[k] << " loss)"; } LOG_IF(INFO, Caffe::root_solver()) << " Train net output #" << score_index++ << ": " << output_name << " = " << result_vec[k] << loss_msg_stream.str(); } } //add by me int max_len = 0; for (int l = 0; l < net_->layers().size(); ++l) { Layer<Dtype>& layer = *net_->layers()[l].get(); if (layer.blobs().size() > 0 && layer.layer_param().name().length() > max_len) max_len = layer.layer_param().name().length(); } for (int l = 0; l < net_->layers().size(); ++l) { Layer<Dtype>& layer = *net_->layers()[l].get(); for (int b = 0; b < layer.blobs().size(); ++b) { Blob<Dtype>& blob = *layer.blobs()[b].get(); const Dtype* blob_cpu_data = blob.cpu_data(); const Dtype* blob_cpu_diff = blob.cpu_diff(); Dtype data_sum = 0; Dtype diff_sum = 0; for (int i = 0; i < blob.count(); ++i) { data_sum += (blob_cpu_data[i] > Dtype(0.)) ? blob_cpu_data[i] : - blob_cpu_data[i]; diff_sum += (blob_cpu_diff[i] > Dtype(0.)) ? blob_cpu_diff[i] : - blob_cpu_diff[i]; } data_sum /= blob.count(); diff_sum /= blob.count(); LOG(INFO) << std::left << std::setw(max_len + 1) << std::setfill(' ') << layer.layer_param().name() << " blob" << b << ": " << std::scientific << data_sum << " [" << diff_sum << "]"; } } //end revised by me }