问题描述
对于一个项目,我需要并行化一个数独求解器,该求解器将在0.005秒内平均终止。为了进行测试,我还计算了该程序为解决数独而进行的“计算”,问题是,当我在多线程中运行它时,如果我对顺序版本的“计算”进行的处理少得多,则完成的时间多线程程序比顺序版本要少,但是如果顺序版本执行例如2000计算以解决数独,并且具有2个线程的多线程版本总共进行相同的2000计算(因此理论上每个线程1000次,时间时间应不小于顺序版本),时间也应不小于顺序版本,因为两个线程之间的管理和同步可能比1000个计算重(如果整个程序顺序需要0.005秒来启动和终止, 1000次计算将需要很少的时间来完成)。 现在的问题是,我必须对其进行并行化,并且我必须仅使用c ++标准库对其进行并行处理,如何才能以这种方式至少在固定数量的线程中做到这一点(我不希望它即使在64-128的情况下也可以工作)线程),那么多线程版本总是比顺序版本更快(完成时间更少)吗?
目前,该程序基于树结构,其中每个节点都是可能的解决方案,并且基于队列来创建任务池,每个线程可以在其中进行工作,进行计算,如果不是解决方案,则向后推找到的结果,然后继续进行其他工作。 代码如下: (您可以使用-pthread进行编译,也可以使用./prog num_of_threads 0 0 0来运行它,后三个零对现在并不重要)。
!唯一相关的代码是最后一部分,getWork(),pushWork(),SolveSudoku(),InfiniteLoop(),其他功能只是使示例可运行,而对分析没有用。
#include <chrono>
#include <iostream>
#include <thread>
#include <vector>
#include <atomic>
#include <math.h>
#include <list>
#include <mutex>
#define UNASSIGNED 0
#define N 9
#define ERROR_PAIR std::make_pair(-1,-1)
using namespace std;
atomic<bool> solutionFound{false};
mutex mtx;
atomic<int> worksDone{0};
//Each node has a sudoku grid and some sub-trees
struct Node {
array<unsigned char,N*N> grid;
vector<Node *> child;
};
Node *newNode(const array<unsigned char,N*N> &newGrid) {
Node *temp = new Node;
temp->grid = newGrid;
return temp;
}
list<vector<Node *>> queueWork(0,vector<Node *>(0));
void printGrid(const array<unsigned char,N*N> &grid) {
for (int row = 0; row < N; row++) {
if (row == 3 || row == 6) {
cout << "---------------------" << endl;
}
for (int col = 0; col < N; coL++) {
if (col == 3 || col == 6) {
cout << "| ";
}
cout << (int)grid[row+col*N] << " ";
}
cout << endl;
}
}
//Check if a number can be inserted in a given position
bool canInsert(const int &val,const int &row_,const int &col_,const array<unsigned char,N*N> &grid) {
//Check column
for (int row = 0; row < N; row++) {
if (grid[row+col_*N] == val) return false;
}
//Check row
for (int col = 0; col < N; coL++) {
if (grid[row_+col*N] == val) return false;
}
//Check Box 3x3
for (int row = 0; row < N; row++) {
for (int col = 0; col < N; coL++) {
if (row / 3 == row_ / 3 &&
col / 3 == col_ / 3) {
if ((grid[row+col*N] == val)) return false;
}
}
}
return true;
}
//Generate a matrix randomly with n initial values
void generateMatrix(const int &seed,const int &n,array<unsigned char,N*N> &grid) {
srand(seed);
int i = 0;
while (i < n) {
int row = rand() % 9;
int col = rand() % 9;
int val = rand() % 9 + 1;
if (grid[row+col*N] == UNASSIGNED && canInsert(val,row,col,grid)) {
grid[row+col*N] = val;
i++;
}
}
return;
}
//Check if the sudoku is solved
bool isSolution(const array<unsigned char,N*N> &grid) {
char row_[N][N+1] = {0};
char column_[N][N+1] = {0};
char Box[3][3][N+1] = {0};
for (int row = 0; row < N; row++) {
for (int col = 0; col < N; coL++) {
//Mark the element in row column and Box
row_[row][grid[row+col*N]] += 1;
column_[col][grid[row+col*N]] += 1;
Box[row / 3][col / 3][grid[row+col*N]] += 1;
//If an element is already present
if (Box[row / 3][col / 3][grid[row+col*N]] > 1 ||
column_[col][grid[row+col*N]] > 1 ||
row_[row][grid[row+col*N]] > 1)
return false;
}
}
return true;
}
//Find the first empty cell
pair<int,int> findCell(const array<unsigned char,N*N> &grid) {
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
if (grid[i+j*N] == UNASSIGNED) {
return make_pair(i,j);
}
}
}
return ERROR_PAIR;
}
//Find all possible numbers that can be inserted,and update the grid with that solution. Return the set of all
//the grids,one for each possible choice.
vector<array<unsigned char,N*N>> getChoices(const int &row,const int &col,N*N> &grid) {
vector<array<unsigned char,N*N>> choices;
for (int i = 1; i < 10; i++) {
if (canInsert(i,grid)) {
array<unsigned char,N*N> tmpGrid = grid;
tmpGrid[row+col*N] = i;
choices.push_back(move(tmpGrid));
}
}
return choices;
}
//Update the childreen of a node.
void addChoices(vector<array<unsigned char,N*N>> &choices,Node &node) {
while (!choices.empty()) {
node.child.push_back(newNode(choices.back()));
choices.pop_back();
}
return;
}
//Get a work from the queue
vector<Node *> getWork(const int chunkSize) {
lock_guard<mutex> lck(mtx);
if(queueWork.empty()){
vector<Node *> error;
return error;
}
if(queueWork.size()>=chunkSize){
vector<Node *> result;
for(int i=0; i<chunkSize;i++){
auto tmp = queueWork.back();
queueWork.pop_back();
for(int i=0;i<tmp.size();i++){
result.push_back(tmp[i]);
}
}
return result;
}
auto tmp = queueWork.back();
queueWork.pop_back();
return tmp;
}
//Put a work in the queue
void pushWork(vector<Node *> &work) {
lock_guard<mutex> lck(mtx);
queueWork.push_back(work);
return;
}
//Compute one step of computation for each node in input,and put all the childreen in the task vector.
void solveSudoku(vector<Node *> &nodes) {
for (Node *&n : nodes) {
if (findCell(n->grid) != ERROR_PAIR) {
pair<int,int> freeCell = findCell(n->grid);
vector<array<unsigned char,N*N>> choices = getChoices(freeCell.first,freeCell.second,n->grid);
if (choices.empty()) {
delete n;
continue;
}
addChoices(choices,*n);
vector<Node *> result;
for (auto &n : n->child) {
result.push_back(n);
}
pushWork(result);
delete n;
continue;
} else if (isSolution(n->grid) && !solutionFound) {
solutionFound.store(true);
printGrid(n->grid);
cout << "That's the first solution found !" << endl;
return;
} else {
continue;
}
}
}
void infiniteLoop(const int chunkSize){
while(!solutionFound){
if(!queueWork.empty()){
auto part = getWork(chunkSize);
if(!part.empty()) {
solveSudoku(part);
}
worksDone++;
}
}
}
int main(int argc,char *argv[]) {
if (argc < 4) {
std::cerr << "use: " << argv[0] << " nw seed initial_values " << endl;
return (-1);
}
chrono::high_resolution_clock::time_point t1 = chrono::high_resolution_clock::Now();
const int nw = atoi(argv[1]); //Number of worker
const int seed = atoi(argv[2]); //Seed for matrix generator
const int initialValues = atoi(argv[3]); //Number of values to generate at the beginning
const int chunkSize = 1;
array<unsigned char,N*N> grid = {0};
vector<thread> tids;
if(seed != 0 && initialValues != 0){
generateMatrix(seed,initialValues,grid);
} else {
grid =
{9,3,5,4,6,1,2,8,9,7};
}
Node *root = newNode(grid);
vector<Node *> primoLavoro ;
primoLavoro.push_back(root);
solveSudoku(primoLavoro);
for(int i=0;i<nw;i++){
tids.push_back(thread(infiniteLoop,chunkSize));
}
cout << "tids.size()" << tids.size() << endl;
for(thread &t : tids){
t.join();
}
if(!solutionFound) cout << "No solution found ! " << endl;
chrono::high_resolution_clock::time_point t2 = chrono::high_resolution_clock::Now();
chrono::duration<double> time_span2 = chrono::duration_cast<chrono::duration<double>>(t2 - t1);
cout << "WorksDone = " << worksDone / nw << endl;
cout << "Tempo vecchio " << time_span2.count() << " seconds with " << nw << " threads !" << endl;
return(0);
}
}
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)