如何并行化需要0.005秒才能顺序终止的程序?

问题描述

对于一个项目,我需要并行化一个数独求解器,该求解器将在0.005秒内平均终止。为了进行测试,我还计算了该程序为解决数独而进行的“计算”,问题是,当我在多线程中运行它时,如果我对顺序版本的“计算”进行的处理少得多,则完成的时间多线程程序比顺序版本要少,但是如果顺序版本执行例如2000计算以解决数独,并且具有2个线程的多线程版本总共进行相同的2000计算(因此理论上每个线程1000次,时间时间应不小于顺序版本),时间也应不小于顺序版本,因为两个线程之间的管理和同步可能比1000个计算重(如果整个程序顺序需要0.005秒来启动和终止, 1000次计算将需要很少的时间来完成)。 现在的问题是,我必须对其进行并行化,并且我必须仅使用c ++标准库对其进行并行处理,如何才能以这种方式至少在固定数量的线程中做到这一点(我不希望它即使在64-128的情况下也可以工作)线程),那么多线程版本总是比顺序版本更快(完成时间更少)吗?

目前,该程序基于树结构,其中每个节点都是可能的解决方案,并且基于队列来创建任务池,每个线程可以在其中进行工作,进行计算,如果不是解决方案,则向后推找到的结果,然后继续进行其他工作。 代码如下: (您可以使用-pthread进行编译,也可以使用./prog num_of_threads 0 0 0来运行它,后三个零对现在并不重要)。

!唯一相关的代码是最后一部分,getWork(),pushWork(),SolveSudoku(),InfiniteLoop(),其他功能只是使示例可运行,而对分析没有用。

#include <chrono>
#include <iostream>
#include <thread>
#include <vector>
#include <atomic>
#include <math.h>
#include <list>
#include <mutex>

#define UNASSIGNED 0
#define N 9
#define ERROR_PAIR std::make_pair(-1,-1)

using namespace std;

atomic<bool> solutionFound{false};
mutex mtx;
atomic<int> worksDone{0};


//Each node has a sudoku grid and some sub-trees
struct Node {
    array<unsigned  char,N*N> grid;
    vector<Node *> child;
};


Node *newNode(const array<unsigned  char,N*N> &newGrid) {
    Node *temp = new Node;
    temp->grid = newGrid;
    return temp;
}

list<vector<Node *>> queueWork(0,vector<Node *>(0));

void printGrid(const array<unsigned  char,N*N> &grid) {
    for (int row = 0; row < N; row++) {
        if (row == 3 || row == 6) {
            cout << "---------------------" << endl;
        }
        for (int col = 0; col < N; coL++) {
            if (col == 3 || col == 6) {
                cout << "| ";
            }
            cout << (int)grid[row+col*N] << " ";
        }
        cout << endl;
    }
}

//Check if a number can be inserted in a given position
bool canInsert(const int &val,const int &row_,const int &col_,const array<unsigned  char,N*N> &grid) {
    //Check column
    for (int row = 0; row < N; row++) {
        if (grid[row+col_*N] == val) return false;
    }
    //Check row
    for (int col = 0; col < N; coL++) {
        if (grid[row_+col*N] == val) return false;
    }
    //Check Box 3x3
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; coL++) {
            if (row / 3 == row_ / 3 &&
                col / 3 == col_ / 3) { 
                if ((grid[row+col*N] == val)) return false;
            }
        }
    }
    return true;
}

//Generate a matrix randomly with n initial values
void generateMatrix(const int &seed,const int &n,array<unsigned  char,N*N> &grid) {
    srand(seed);
    int i = 0;
    while (i < n) {
        int row = rand() % 9;
        int col = rand() % 9;
        int val = rand() % 9 + 1;
        if (grid[row+col*N] == UNASSIGNED && canInsert(val,row,col,grid)) {
            grid[row+col*N] = val;
            i++;
        }
    }
    return;
}

//Check if the sudoku is solved
bool isSolution(const array<unsigned char,N*N> &grid)  {
    char row_[N][N+1] = {0};
    char column_[N][N+1] = {0};
    char Box[3][3][N+1] = {0};
    
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; coL++) {
            //Mark the element in row column and Box
            row_[row][grid[row+col*N]] += 1;
            column_[col][grid[row+col*N]] += 1;
            Box[row / 3][col / 3][grid[row+col*N]] += 1;
            //If an element is already present
            if (Box[row / 3][col / 3][grid[row+col*N]] > 1 ||
                column_[col][grid[row+col*N]] > 1 ||
                row_[row][grid[row+col*N]] > 1)
                return false;
        }
    }
    return true;
}

//Find the first empty cell
pair<int,int> findCell(const array<unsigned  char,N*N> &grid) {
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            if (grid[i+j*N] == UNASSIGNED) {
                return make_pair(i,j);
            }
        }
    }
    return ERROR_PAIR;
}

//Find all possible numbers that can be inserted,and update the grid with that solution. Return the set of all
//the grids,one for each possible choice.
vector<array<unsigned char,N*N>> getChoices(const int &row,const int &col,N*N> &grid) {
    vector<array<unsigned char,N*N>> choices;
    for (int i = 1; i < 10; i++) {
        if (canInsert(i,grid)) {
            array<unsigned char,N*N> tmpGrid = grid;
            tmpGrid[row+col*N] = i;
            choices.push_back(move(tmpGrid));
        }
    }
    return choices;
}

//Update the childreen of a node.
void addChoices(vector<array<unsigned char,N*N>> &choices,Node &node) {
    while (!choices.empty()) {
        node.child.push_back(newNode(choices.back()));
        choices.pop_back();
    }
    return;
}

//Get a work from the queue
vector<Node *> getWork(const int chunkSize) {
    lock_guard<mutex> lck(mtx);
    
    if(queueWork.empty()){
        vector<Node *> error;
        return error;
    }
    if(queueWork.size()>=chunkSize){
        vector<Node *> result;
        for(int i=0; i<chunkSize;i++){
            auto tmp = queueWork.back();
            queueWork.pop_back();
        for(int i=0;i<tmp.size();i++){
            result.push_back(tmp[i]);
            }
        }
    return result;
    }

    auto tmp = queueWork.back();
    queueWork.pop_back();
    return tmp;
}

//Put a work in the queue
void pushWork(vector<Node *> &work) {
    lock_guard<mutex> lck(mtx);
    queueWork.push_back(work);
    return;
}


//Compute one step of computation for each node in input,and put all the childreen in the task vector.
void solveSudoku(vector<Node *> &nodes) {
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int,int> freeCell = findCell(n->grid);
            vector<array<unsigned char,N*N>> choices = getChoices(freeCell.first,freeCell.second,n->grid);
            if (choices.empty()) { 
                delete n;
                continue; 
                }
            addChoices(choices,*n);
            vector<Node *> result;
            for (auto &n : n->child) {
                result.push_back(n);
            }
            pushWork(result);
            delete n;
            continue;
        } else if (isSolution(n->grid) && !solutionFound) {
                solutionFound.store(true);
                printGrid(n->grid);
                cout << "That's the first solution found !" << endl;

                return;
            } else {
                continue;
            }
   }
}

void infiniteLoop(const int chunkSize){
    while(!solutionFound){
        if(!queueWork.empty()){
            auto part = getWork(chunkSize);
             
            if(!part.empty()) {
                solveSudoku(part);
            } 
            worksDone++;    
        }
    } 
}



int main(int argc,char *argv[]) {
    if (argc < 4) {
        std::cerr << "use: " << argv[0]  << " nw seed initial_values " << endl;
        return (-1);
    }

    chrono::high_resolution_clock::time_point t1 = chrono::high_resolution_clock::Now();

    const int nw = atoi(argv[1]); //Number of worker
    const int seed = atoi(argv[2]); //Seed for matrix generator
    const int initialValues = atoi(argv[3]); //Number of values to generate at the beginning
    const int chunkSize = 1;

    array<unsigned char,N*N> grid = {0}; 
    vector<thread> tids;

    if(seed != 0 && initialValues != 0){ 
        generateMatrix(seed,initialValues,grid);
    } else {
        grid = 
                                      {9,3,5,4,6,1,2,8,9,7};
    }
    
    Node *root = newNode(grid);
    vector<Node *> primoLavoro ;
    primoLavoro.push_back(root);
    solveSudoku(primoLavoro);
    

        for(int i=0;i<nw;i++){
            tids.push_back(thread(infiniteLoop,chunkSize));
        }

    cout << "tids.size()" << tids.size() << endl;
        for(thread &t : tids){
            t.join();  
        }
    


   if(!solutionFound) cout << "No solution found ! " << endl;
   
   
   chrono::high_resolution_clock::time_point t2 = chrono::high_resolution_clock::Now(); 
   chrono::duration<double> time_span2 = chrono::duration_cast<chrono::duration<double>>(t2 - t1);

   cout << "WorksDone = " << worksDone / nw << endl;
   cout << "Tempo vecchio " << time_span2.count() << " seconds with " << nw << " threads !" << endl;
   return(0);
    
}
    
}

解决方法

暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!

如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@)