c++ - c+ + - 列印堆棧跟蹤和測量函數執行時析構函數的執行

我想有一些簡單的檢測,列印函數調用堆棧,並測量每個函數所花費的時間。


#define STACK_TRACE_ENABLED 1



struct trace_t {


 std::string func_name;


 std::chrono::high_resolution_clock::time_point begin;


 std::chrono::high_resolution_clock::time_point end;


};



using stack_trace_t = std::vector<trace_t>;


auto stack_trace = stack_trace_t{};



void print_top_of_trace() {


 using namespace std::chrono;


 duration<double, std::nano> t = stack_trace.back().end - stack_trace.back().begin;


 std::cout <<"##" << std::setw(50) << stack_trace.back().func_name 


 <<" took" << std::setw(16) << t.count() 


 <<" nanoseconds ##n";


}



struct tracer {


 tracer(std::string fn) 


 :begin{std::chrono::high_resolution_clock::now()}


 {


 stack_trace.push_back(trace_t{fn, begin, end});


 }



 ~tracer() {


 stack_trace[stack_trace.size() - 1].end = std::chrono::high_resolution_clock::now();


 print_top_of_trace();


 stack_trace.pop_back();


 }



 std::chrono::high_resolution_clock::time_point begin;


 std::chrono::high_resolution_clock::time_point end;


}; 



和一些宏來簡化使用:


#ifndef NDEBUG


#define ADD_STACK_TRACE_(func_name) tracer __ny_tracer(func_name);


#define ADD_STACK_TRACE ADD_STACK_TRACE_(__PRETTY_FUNCTION__)


#else


#define ADD_STACK_TRACE_(func_name) (void)(0);


#define ADD_STACK_TRACE ADD_STACK_TRACE_(__PRETTY_FUNCTION__)


#endif 



然後我有兩個函數來測量,需要添加ADD_STACK_TRACE作為函數的第一行,完整的工作示例見godbolt,


constexpr auto sin = [](float x) {


 ADD_STACK_TRACE


 return x - 


 ((x * x * x) / 6.0f) + 


 ((x * x * x * x * x) / 120.0f) -


 ((x * x * x * x * x * x * x) / 5040.0f);


};



constexpr auto cos = [](float x) {


 ADD_STACK_TRACE


 return 1.0f - 


 ((x * x) / 2.0f) + 


 ((x * x * x * x) / 24.0f) -


 ((x * x * x * x * x * x) / 720.0f);


};



float sum(float i1, float i2) {


 ADD_STACK_TRACE


 return i1 + i2;


}



float tan(float f) {


 ADD_STACK_TRACE


 return sin(f) / cos(f);


}



int main() {


 ADD_STACK_TRACE


 float param = sum(44.0f, 1.0f) * PI / 180.0f;


 return tan(param);


} 



目前,我得到以下輸出:


## float sum(float, float) took 1442 nanoseconds ##



## auto (anonymous class)::operator()(float) const took 2257 nanoseconds ##



## auto (anonymous class)::operator()(float) const took 118 nanoseconds ##



## float tan(float) took 4689 nanoseconds ##



## int main() took 44182 nanoseconds ##



我覺得有點奇怪,sincos都使用Taylor擴展,我看不到為什麼它們在執行時間上會有很大的不同,實際上,如果從ADD_STACK_TRACEtan中移除,我得到以下輸出:


## float sum(float, float) took 1324 nanoseconds ##



## auto (anonymous class)::operator()(float) const took 169 nanoseconds ##



## auto (anonymous class)::operator()(float) const took 124 nanoseconds ##



## int main() took 33660 nanoseconds ##



可以看到,這表明sincos非常接近,這裡有什麼問題?

时间:

std::vector<T>文檔說明:

因為它重新分配新的連續內存,並將元素移動/複製到新創建的內存中。

https://en.cppreference.com/w/cpp/container/vector/push_back

要防止調整大小,請嘗試以下操作:


constexpr auto sin = [](float x) {


 ADD_STACK_TRACE


 return x - 


 ((x * x * x) / 6.0f) + 


 ((x * x * x * x * x) / 120.0f) -


 ((x * x * x * x * x * x * x) / 5040.0f);


};



constexpr auto cos = [](float x) {


 ADD_STACK_TRACE


 return 1.0f - 


 ((x * x) / 2.0f) + 


 ((x * x * x * x) / 24.0f) -


 ((x * x * x * x * x * x) / 720.0f);


};



float sum(float i1, float i2) {


 ADD_STACK_TRACE


 return i1 + i2;


}



float tan(float f) {


 ADD_STACK_TRACE


 return sin(f) / cos(f);


}



int main() {


 stack_trace.reserve( 1000 ); // A big capacity


 ADD_STACK_TRACE


 float param = sum(44.0f, 1.0f) * PI / 180.0f;


 return tan(param);


} 



...