问题描述
由于c ++ 17 std库支持并行算法,我认为这将是我们的首选,但是在与tbb
和openmp
比较之后,我改变了主意,发现std库要慢得多。
在这篇文章中,我想征询有关我是否应该放弃std库的并行算法,并使用tbb
或openmp
的专业建议,谢谢!
Env:
- Mac OSX,Catalina 10.15.7
- GNU g ++-10
基准代码:
#include <algorithm>
#include <cmath>
#include <chrono>
#include <execution>
#include <iostream>
#include <tbb/parallel_for.h>
#include <vector>
const size_t N = 1000000;
double std_for() {
auto values = std::vector<double>(N);
size_t n_par = 5lu;
auto indices = std::vector<size_t>(n_par);
std::iota(indices.begin(),indices.end(),0lu);
size_t stride = static_cast<size_t>(N / n_par) + 1;
std::for_each(
std::execution::par,indices.begin(),[&](size_t index) {
int begin = index * stride;
int end = (index+1) * stride;
for (int i = begin; i < end; ++i) {
values[i] = 1.0 / (1 + std::exp(-std::sin(i * 0.001)));
}
});
double total = 0;
for (double value : values)
{
total += value;
}
return total;
}
double tbb_for() {
auto values = std::vector<double>(N);
tbb::parallel_for(
tbb::blocked_range<int>(0,values.size()),[&](tbb::blocked_range<int> r) {
for (int i=r.begin(); i<r.end(); ++i) {
values[i] = 1.0 / (1 + std::exp(-std::sin(i * 0.001)));
}
});
double total = 0;
for (double value : values) {
total += value;
}
return total;
}
double omp_for()
{
auto values = std::vector<double>(N);
#pragma omp parallel for
for (int i=0; i<values.size(); ++i) {
values[i] = 1.0 / (1 + std::exp(-std::sin(i * 0.001)));
}
double total = 0;
for (double value : values) {
total += value;
}
return total;
}
double seq_for()
{
auto values = std::vector<double>(N);
for (int i=0; i<values.size(); ++i) {
values[i] = 1.0 / (1 + std::exp(-std::sin(i * 0.001)));
}
double total = 0;
for (double value : values) {
total += value;
}
return total;
}
void time_it(double(*fn_ptr)(),const std::string& fn_name) {
auto t1 = std::chrono::high_resolution_clock::Now();
auto rez = fn_ptr();
auto t2 = std::chrono::high_resolution_clock::Now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>( t2 - t1 ).count();
std::cout << fn_name << ",rez = " << rez << ",dur = " << duration << std::endl;
}
int main(int argc,char** argv) {
std::string op(argv[1]);
if (op == "std_for") {
time_it(&std_for,op);
} else if (op == "omp_for") {
time_it(&omp_for,op);
} else if (op == "tbb_for") {
time_it(&tbb_for,op);
} else if (op == "seq_for") {
time_it(&seq_for,op);
}
}
编译选项:
g++ --std=c++17 -O3 b.cpp -ltbb -I /usr/local/include -L /usr/local/lib -fopenmp
结果:
std_for,rez = 500106,dur = 11119
tbb_for,dur = 7372
omp_for,dur = 4781
seq_for,dur = 27910
我们可以看到std_for
比seq_for
(顺序for循环)快,但它仍然比tbb
和openmp
慢。
更新
正如人们在评论中建议的那样,为了公平起见,我分别运行每个for
。上面的代码已更新,结果如下,
>>> ./a.out seq_for
seq_for,dur = 29885
>>> ./a.out tbb_for
tbb_for,dur = 10619
>>> ./a.out omp_for
omp_for,dur = 10052
>>> ./a.out std_for
std_for,dur = 12423
就像ppl所说的那样,与以前的结果相比,连续运行4个版本是不公平的。
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)