问题描述
我正在尝试使用 perf
分析给定进程中的多个线程。尽管在下面的代码中确实出现,即使 pid
的 perf_event_open
参数是 0
(这应该导致对整个过程进行分析?),硬件计数器值仅对应到执行线程(相对于进程内跨线程求和的性能计数器),inf_loop
未计算在内。
我的问题是:如何分析进程中的所有线程与仅执行 perf_event_open
的线程,如下所示? perf_event_attr
中是否还有其他一些配置需要设置以启用进程范围的分析?
static long perf_event_open(struct perf_event_attr *hw_event,pid_t pid,int cpu,int group_fd,unsigned long flags)
{
int ret;
ret = syscall(__NR_perf_event_open,hw_event,pid,cpu,group_fd,flags);
return ret;
}
static int fd;
void setup()
{
struct perf_event_attr pe;
memset(&pe,sizeof(pe));
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(pe);
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
pe.disabled = 1;
pe.exclude_kernel = 1;
pe.exclude_hv = 1;
pid_t pid = 0;
int cpu = -1;
fd = perf_event_open(&pe,-1,0);
if (fd == -1) {
fprintf(stderr,"Error opening leader %llx\n",pe.config);
exit(EXIT_FAILURE);
}
ioctl(fd,PERF_EVENT_IOC_RESET,0);
ioctl(fd,PERF_EVENT_IOC_ENABLE,0);
}
int64_t read()
{
int64_t count;
read(fd,&count,sizeof(count));
return count;
}
std::size_t k;
// The instruction counts do not reflect the cycles consumed here
void inf_loop()
{
k = 2;
volatile size_t *p = &k;
while (*p)
{
++k;
}
}
int main(int argc,char **argv)
{
setup();
thread t1(inf_loop);
int count = 0;
for (uint64_t idx = 0; idx < (1ULL << 54); ++idx)
{
if (idx % (1ULL << 32) == 0)
{
cout << "Cycles: " << read() << endl;
}
}
}
解决方法
您想要的是 #include <iostream>
#include <type_traits>
template <class Derived>
class Singleton
{
public:
static Derived& getInstance()
{
// Assert that the template arg really is derived from the appropriate instantiation of the base class template
static_assert(std::is_base_of<Singleton<Derived>,Derived>::value);
static Derived s;
return s;
}
virtual void func()
{
std::cout << "from parent" << std::endl;
}
};
class Child : public Singleton<Child>
{
public:
void func() override
{
std::cout << "from child" << std::endl;
}
};
int main()
{
auto& s = Child::getInstance(); // s is a Child here
s.func(); // Outputs "from child" as expected
}
的 perf_event_attr
标志,它将子线程计数合并到父线程中。