caffe运行中显示梯度信息

xiaoxiao2021-02-28  46

为了显示各层的梯度变化,阅读了此篇博客http://blog.csdn.net/yihaizhiyan/article/details/44159063

但是新版caffe solver.cpp中做了优化,所以函数添加位置有变,在此记录下

修改目录 caffe/src/caffe/solver.cpp

开头修改如下:

#include <cstdio> #include <algorithm> #include <string> #include <vector> #include <iomanip> #include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/solver.hpp" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/util/upgrade_proto.hpp" using std::max; using std::min; using std::setw; using std::scientific; using std::left;

后面修改

void Solver<Dtype>::Step(int iters) {   const int start_iter = iter_;   const int stop_iter = iter_ + iters;   int average_loss = this->param_.average_loss();   losses_.clear();   smoothed_loss_ = 0;   iteration_timer_.Start();   while (iter_ < stop_iter) {     // zero-init the params     net_->ClearParamDiffs();     if (param_.test_interval() && iter_ % param_.test_interval() == 0         && (iter_ > 0 || param_.test_initialization())) {       if (Caffe::root_solver()) {         TestAll();       }       if (requested_early_exit_) {         // Break out of the while loop because stop was requested while testing.         break;       }     }     for (int i = 0; i < callbacks_.size(); ++i) {       callbacks_[i]->on_start();     }     const bool display = param_.display() && iter_ % param_.display() == 0;     net_->set_debug_info(display && param_.debug_info());     // accumulate the loss and gradient     Dtype loss = 0;     for (int i = 0; i < param_.iter_size(); ++i) {       loss += net_->ForwardBackward();     }     loss /= param_.iter_size();     // average the loss across iterations for smoothed reporting     UpdateSmoothedLoss(loss, start_iter, average_loss);     if (display) {       float lapse = iteration_timer_.Seconds();       float per_s = (iter_ - iterations_last_) / (lapse ? lapse : 1);       LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_           << " (" << per_s << " iter/s, " << lapse << "s/"           << param_.display() << " iters), loss = " << smoothed_loss_;       iteration_timer_.Start();       iterations_last_ = iter_;       const vector<Blob<Dtype>*>& result = net_->output_blobs();       int score_index = 0;       for (int j = 0; j < result.size(); ++j) {         const Dtype* result_vec = result[j]->cpu_data();         const string& output_name =             net_->blob_names()[net_->output_blob_indices()[j]];         const Dtype loss_weight =             net_->blob_loss_weights()[net_->output_blob_indices()[j]];         for (int k = 0; k < result[j]->count(); ++k) {           ostringstream loss_msg_stream;           if (loss_weight) {             loss_msg_stream << " (* " << loss_weight                             << " = " << loss_weight * result_vec[k] << " loss)";           }           LOG_IF(INFO, Caffe::root_solver()) << "    Train net output #"               << score_index++ << ": " << output_name << " = "               << result_vec[k] << loss_msg_stream.str();         }       }        //add by me       int max_len = 0;       for (int l = 0; l < net_->layers().size(); ++l) {         Layer<Dtype>& layer = *net_->layers()[l].get();         if (layer.blobs().size() > 0 && layer.layer_param().name().length() > max_len)          max_len = layer.layer_param().name().length();          }      for (int l = 0; l < net_->layers().size(); ++l) {         Layer<Dtype>& layer = *net_->layers()[l].get();         for (int b = 0; b < layer.blobs().size(); ++b) {          Blob<Dtype>& blob = *layer.blobs()[b].get();          const Dtype* blob_cpu_data = blob.cpu_data();          const Dtype* blob_cpu_diff = blob.cpu_diff();          Dtype data_sum = 0;          Dtype diff_sum = 0;          for (int i = 0; i < blob.count(); ++i) {           data_sum += (blob_cpu_data[i] > Dtype(0.)) ? blob_cpu_data[i]           : - blob_cpu_data[i];           diff_sum += (blob_cpu_diff[i] > Dtype(0.)) ? blob_cpu_diff[i]           : - blob_cpu_diff[i];          }          data_sum /= blob.count();          diff_sum /= blob.count();          LOG(INFO) << std::left << std::setw(max_len + 1) << std::setfill(' ')                      << layer.layer_param().name()                      << " blob" << b << ": " << std::scientific             << data_sum << " [" << diff_sum << "]";      }     }    //end revised by me                       }

转载请注明原文地址: https://www.6miu.com/read-74539.html

最新回复(0)