问题描述
我是 MPI 的新手。我正在尝试为进程分配一个邻接矩阵,以便我能够实现由 https://ieeexplore.ieee.org/document/1559977 给出的一维 BFS 算法。假设我有一个 6*6 的矩阵,有 4 个进程:
0 0 0 0 0 0
1 1 1 1 1 1
0 1 0 1 0 1
1 0 1 0 1 0
1 1 1 1 1 1
0 1 0 1 0 1
我希望它的处理方式如下:
A A A A A A
B B B B B B
C C C C C C
D D D D D D
D D D D D D
D D D D D D
每个进程都分配了 (int)(no_of_vertices/size)
行,最后一个进程分配了等于 (no_of_vertices-(size-1)*local_vertices)
的其余行 因为图的大小几乎不能被整除每个进程具有相同工作量的进程数p。
因此,我通读了 Jonathan Dursi sending blocks of 2D array in C using MPI 的这个答案并拥有自己的代码。我把它贴在下面:
#include "mpi.h"
#include<stdio.h>
#include<stdlib.h>
#include<iostream>
#include<iostream>
#include <fstream>
#include <algorithm>
#include <vector>
#include <string>
#include <sstream>
#include <chrono>
#include <cmath>
using namespace std;
#define MAX_QUEUE_SIZE 5
int Max(int a,int b,int c)
{
int max;
if (a >= b)
{
if (a >= c) {
max = a;
}
else
max = c;
}
else if (b >= c) { max = b; }
else max = c;
return max;
}
int areAllVisited(int visited[],int size)
{
for (int i = 0; i < size; i++)
{
if (visited[i] == 0)
return 0;
}
return 1;
}
int malloc2dint(int*** array,int n,int m) {
/* allocate the n*m contiguous items */
int* p = (int*)malloc(n * m * sizeof(int));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (int**)malloc(n * sizeof(int*));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (int i = 0; i < n; i++)
(*array)[i] = &(p[i * m]);
return 0;
}
int free2dint(int*** array) {
/* free the memory - the first element of the array is at the start */
free(&((*array)[0][0]));
/* free the pointers into the memory */
free(*array);
return 0;
}
int main(int argc,char* argv[])
{
//Variables and Initializations
int size,rank;
int local_vertices;
int** adj_matrix=NULL;
int *adjacency_matrix;
int adjacency_queue[MAX_QUEUE_SIZE];
int source_vertex;
int no_of_vertices;
int ** local_adj_matrix;
int *visited;
int *distance;
int* frontier_vertex;
//MPI Code
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&size); // MPI_COMM -> communicator,size=number of processes
// Get my rank in the communicator
MPI_Comm_rank(MPI_COMM_WORLD,&rank);//rank= current processes
//read input file,transform into adjacency matrix
if (rank == 0)
{
string filename="out.txt";
std::fstream in(filename.c_str());
cout << "reading file:" << filename << endl;
string s;
size_t n = 0,m = 0;
string data1,data2;
while (true)
{
std::getline(in,s);
istringstream is(s);
is >> data1 >> data2;
int d1 = stoi(data1);
int d2 = stoi(data2);
n = Max(n,d2,d1);
m += 1;
if (in.eof()) { break; }
}
//this block will count the number of lines and calculate the maximun number appeared in the file,which are the parameters n,m(vertice,edge)
in.clear();
in.seekg(0,ios::beg);
n += 1;
m -= 1;
//initialize the 2D array
malloc2dint(&adj_matrix,n,n);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++)
{
adj_matrix[i][j] = 0;
}
}
for (size_t i = 0; i < m; i++)
{
int x,y;
std::getline(in,s);
istringstream is(s);
is >> data1 >> data2;
x = stoi(data1);
y = stoi(data2);
adj_matrix[x][y] = 1;
}
in.close();
//print the matrix
cout << "the adjacency matrix is:" << endl;
for (int i = 0; i < n; i++) {
cout << endl;
for (int j = 0; j < n; j++) {
cout << adj_matrix[i][j] << " ";
}
}
source_vertex = 0;
no_of_vertices = n;
local_vertices = (int)(no_of_vertices/size);
}
MPI_Bcast(&local_vertices,1,MPI_INT,MPI_COMM_WORLD);
MPI_Bcast(&no_of_vertices,MPI_COMM_WORLD);
MPI_Bcast(&source_vertex,MPI_COMM_WORLD);
/* create a datatype to describe the subarrays of the global array */
int sizes[2] = { no_of_vertices,no_of_vertices }; /* global size */
int subsizes[2] = { 1,no_of_vertices }; /* local size */
int starts[2] = { 0,0 }; /* where this one starts */
MPI_Datatype type,subarrtype;
MPI_Type_create_subarray(2,sizes,subsizes,starts,MPI_ORDER_C,&type);
MPI_Type_create_resized(type,no_of_vertices * sizeof(int),&subarrtype);// /local_vertices
MPI_Type_commit(&subarrtype);
if (rank == size - 1) {
local_vertices = no_of_vertices - (size - 1) * local_vertices;
malloc2dint(&local_adj_matrix,local_vertices,no_of_vertices); }
malloc2dint(&local_adj_matrix,no_of_vertices);
//MPI_Barrier(MPI_COMM_WORLD);
int* adjptr = NULL;
if (rank == 0) adjptr = &(adj_matrix[0][0]);
int *sendcounts=new int[size];
int *displs = new int[size];
if (rank == 0) {
for (int i = 0; i < size; i++)
if (i == size - 1) {
sendcounts[i] = no_of_vertices - (size - 1) * local_vertices;
}
else
sendcounts[i] = local_vertices;
int disp = 0;
for (int i = 0; i < size; i++) {
displs[i] = disp;
disp += no_of_vertices*local_vertices;
}
}
//Scattering each row of the adjacency matrix to each of the processes
//int MPI_Scatter(const void* sendbuf,int sendcount,MPI_Datatype sendtype,void* recvbuf,int recvcount,MPI_Datatype recvtype,int root,MPI_Comm comm)
MPI_Scatterv(adjptr,sendcounts,displs,subarrtype,&local_adj_matrix[0][0],local_vertices*no_of_vertices,MPI_COMM_WORLD);
cout << "rank=" << rank << endl;
for(int i=0;i<local_vertices;i++){
cout << endl;
for (int j = 0; j < no_of_vertices; j++) {
cout << local_adj_matrix[i][j] << " ";
}
}
MPI_Barrier(MPI_COMM_WORLD);
//End of BFS code
MPI_Finalize();
return 0;
}
然而,2 中的情况和我的不一样,很自然地,我得到了意想不到的输出:
reading file:out.txt
the adjacency matrix is:
0 0 1 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 1 0 0 0 0 0 0 0 0
0 1 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 1 0 0 1 0 0 0 0 0 0 1
0 0 0 0 0 0 0 0 0 0 1 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 1 0 0 0 0 0 0
0 0 0 0 1 0 0 0 0 0 0 0
0 0 0 0 1 0 0 0 0 0 0 0
rank=1
0 0 0 0 0 0 0 1 0 0 0 0 rank=0
rank=2
rank=4
rank=3
rank=5
rank=6
rank=7
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
-410582944 556 -410582944 556 -410353664 556 815104 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 -912406441 -1879044864
0 0 0 0 0 0 -906966900 -1879046144 -410511728 556 -410398928 556
0 0 0 0 0 0 0 0 0 0 0 0
0 0 1 1 0 0 0 0 0 0 0 0
-33686019 2949222 32575338 37898 -410536560 556 -410396256 556 3473509 6357044 8192057 0
0 0 0 0 0 0 0 0 0 0 0 0
C:\Program Files\Microsoft MPI\Bin\mpiexec.exe (process 3628) exited with code 0.
Press any key to close this window . . .
我确信在初始化两个数组 sendcounts
和 displs
以及创建 newtype subarrtype
时存在巨大错误,但我不知道如何修复它们。有人帮我吗?
可能的输入文件:
0 3
0 2
2 3
3 1
5 11
5 4
5 1
6 10
8 5
9 4
10 4
11 7
解决方法
由于您已经定义了一个映射到您要发送的所有本地数据的子数组,因此所有发送计数都应该是 1。接收计数需要是消息中的整数数量(这就是您似乎拥有的将它们设置为) 但发送计数是一个子数组。
,正如 sendcounts 在 sizeof(dataype) 方面一样,位移也是如此。如果您编辑第 186 行:
disp += no_of_vertices*local_vertices;
成为:
disp += local_vertices;
那么我认为它有效。
当您散布完整的连续行时,您实际上不需要调整大小的数据类型的所有复杂性。