使用 MPI_Scatter 和 MPI_Type_vector 时出现意外的列分布

问题描述

我正在尝试使用 MPI 在 N 个进程中划分 2D 矩阵的列。对于模板，我使用了 MPI_Scatter - sending columns of 2D array 上的示例。

我的代码：

//HEADERS
    char** board_initialize(int n,int m)
    {
        int k,l;

        char* bd = (char*)malloc(sizeof(char) * n * m);
        char** b = (char**)malloc(sizeof(char*) * n);
        for (k = 0; k < n; k++)
            b[k] = &bd[k * m];

        for (k = 0; k < n; k++)
            for (l = 0; l < m; L++)
                b[k][l] = rand() < 0.25 * RAND_MAX;

        return b;
    }

    void board_print(char** b,int n,l;

    //  system("@cls||clear");
        for (k = 0; k < n; k++)
        {
            for (l = 0; l < m; L++)
                printf("%d",b[k][l]);
            printf("\n");
        }
        printf("\n");
    }


    int main(int argc,char* argv[])
    {   
        int N = 10;
        int i,j;

        char * boardptr = NULL;                 // ptr to board
        char ** board;                          // board,2D matrix,contignous memory allocation!

        int procs,myid;            
        int mycols;
        char ** myboard;                        // part of board that belongs to a process

        MPI_Init(&argc,&argv);                 // initiailzation
        
        MPI_Comm_rank(MPI_COMM_WORLD,&myid);   // process ID
        MPI_Comm_size(MPI_COMM_WORLD,&procs);  // number of processes

        // initialize global board
        if (myid == 0)
        {
            srand(1573949136);
            board = board_initialize(N,N);
            boardptr = *board;
            board_print(board,N,N);
        }
        // divide work
        mycols = N / procs;


        // initialize my structures
        myboard = board_initialize(N,mycols);

        MPI_Datatype column_not_resized,column_resized;
        MPI_Type_vector(N,1,MPI_CHAR,&column_not_resized);
        MPI_Type_commit(&column_not_resized);
        MPI_Type_create_resized(column_not_resized,1*sizeof(char),&column_resized);
        MPI_Type_commit(&column_resized);

        // scatter initial matrix
        MPI_Scatter(boardptr,mycols,column_resized,*myboard,MPI_COMM_WORLD);
        MPI_Barrier(MPI_COMM_WORLD);

        board_print(myboard,mycols);
        
        MPI_Finalize();         // finalize MPI

        return 0;
    }

整个板子看起来像：

如果我使用 2 个进程，我希望每个进程都会得到一半（第一个进程第 1-5 列和第二个进程第 6-10 列）。但是如果我打印两个过程的myboard，我会得到一些奇怪的结果：

proc0:       proc1:
0 0 0 0 0    1 0 0 1 0 
0 0 1 0 0    0 0 0 1 1 
0 1 0 0 0    0 0 0 0 0 
0 1 0 0 0    0 1 0 1 0 
0 0 0 0 1    0 1 1 0 0 
0 1 1 0 0    0 0 1 0 0 
0 1 0 1 0    0 0 0 1 0 
0 0 0 0 1    0 0 1 0 0 
1 0 0 0 0    0 0 1 0 0 
0 1 0 0 0    0 1 0 0 0

这可能是一些愚蠢的错误，但我似乎无法找到它。任何帮助将非常感激。

注意：proc1 的输出可能只是一些垃圾，因为我每次运行都会得到不同的输出。

解决方法

您忘记区分发送和接收类型，即 MPI_Type_vector 的参数将取决于适当的类型。您需要执行以下操作：

MPI_Datatype acol,acoltype,bcol,bcoltype;
if (myid == 0) {
    MPI_Type_vector(N,1,N,MPI_CHAR,&acol);
    MPI_Type_commit(&acol);
    MPI_Type_create_resized(acol,1*sizeof(char),&acoltype);
}
MPI_Type_vector(N,mycols,&bcol);
MPI_Type_commit(&bcol);
MPI_Type_create_resized(bcol,&bcoltype);
MPI_Type_commit(&bcoltype);
MPI_Scatter (boardptr,*myboard,bcoltype,MPI_COMM_WORLD);

从发送者的角度来看，您想要创建 N 块（ie，N 行），大小为 1，步幅为 N（ > 即， N 列）。因此：

MPI_Type_vector(N,&acol);

从接收者的角度来看，您想要创建 N 块（ie，N 行），大小为 1，步幅为 mycols （ >ie, mycols 列）。因此：

MPI_Type_vector(N,&bcol);

旁注，在 MPI_Scatter 之后确实需要 MPI_Barrier，因为后者已经是一个阻塞调用。

最终代码如下所示：

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

    char** board_initialize(int n,int m)
    {
        int k,l;

        char* bd = (char*)malloc(sizeof(char) * n * m);
        char** b = (char**)malloc(sizeof(char*) * n);
        for (k = 0; k < n; k++)
            b[k] = &bd[k * m];

        for (k = 0; k < n; k++)
            for (l = 0; l < m; l++)
                b[k][l] = rand() < 0.25 * RAND_MAX;

        return b;
    }

    void board_print(char** b,int n,l;

    //  system("@cls||clear");
        for (k = 0; k < n; k++)
        {
            for (l = 0; l < m; l++)
                printf("%d",b[k][l]);
            printf("\n");
        }
        printf("\n");
    }


   int main(int argc,char* argv[])
    {   
        int N = 10;
        int i,j;

        char * boardptr = NULL;                 // ptr to board
        char ** board;                          // board,2D matrix,contignous memory allocation!

        int procs,myid;            
        int mycols;
        char ** myboard;                        // part of board that belongs to a process

        MPI_Init(&argc,&argv);                 // initiailzation
        
        MPI_Comm_rank(MPI_COMM_WORLD,&myid);   // process ID
        MPI_Comm_size(MPI_COMM_WORLD,&procs);  // number of processes

        // initialize global board
        if (myid == 0)
        {
            srand(1573949136);
            board = board_initialize(N,N);
            boardptr = *board;
            board_print(board,N);
        }
        // divide work
        mycols = N / procs;


        // initialize my structures
        myboard = board_initialize(N,mycols);
    MPI_Datatype acol,bcoltype;

    if (myid == 0) {
            MPI_Type_vector(N,&acol);       
                MPI_Type_commit(&acol);
            MPI_Type_create_resized(acol,&acoltype);
    }
    MPI_Type_vector(N,&bcol);       

    MPI_Type_commit(&bcol);
    MPI_Type_create_resized(bcol,&bcoltype);
    MPI_Type_commit(&bcoltype);
    

    MPI_Scatter (boardptr,MPI_COMM_WORLD);
        board_print(myboard,mycols);
        
        MPI_Finalize();         // finalize MPI

        return 0;
    }

或者，在 IMO 中更容易的是按行而不是按列划分，代码如下所示：

int myrows = N / procs;
myboard = board_initialize(myrows,N);
MPI_Scatter (boardptr,myrows,MPI_COMM_WORLD);
board_print(myboard,N);

c mpi parallel-processing performance performance performance scatter-matrix