Fortran流访问效率与MPI-IO的对比

问题描述

我有一段代码的并行部分,其中我在多个块中写出n个大数组(表示一个数字网格),然后在不同大小的块中读取它们。为此,我使用了流访问,因此每个处理器都独立地写入其块,但是在本节的2个处理器组测试中,我看到不一致的时序花费了0.5-4秒的时间。

我知道您可以使用MPI-IO做类似的事情,但是我不确定会有什么好处,因为没有必要进行同步。我想知道是否有一种方法可以提高写入性能,或者是否有理由将MPI-IO作为本节的更好选择。

这是代码部分的示例,在该部分中,我创建文件以使用两个组(norb = 0或1]来写mygroup数组:

do irbsic=1,norb
  [varIoUs operations]

  blocksize=int(nmsh_tot/ngroups)
  OPEN(unit=iunit,FILE='ZPOT',STATUS='UNKNowN',ACCESS='STREAM')
  mypos = 1 + (IRBSIC-1)*nmsh_tot*8     ! starting point for writing IRBSIC
  mypos = mypos + mygrouP*(8*blocksize) ! starting point for mesh group
  WRITE(iunit,POS=mypos) POT(1:nmsh)  
  CLOSE(iunit)

  OPEN(unit=iunit,FILE='RHOI',POS=mypos) RHOG(1:nmsh,1,1)
  CLOSE(iunit)

  [varIoUs operations]
end do

解决方法

(如评论中所述),我强烈建议不要为此使用Fortran流访问。只有在单个进程正在访问文件的情况下,才能保证标准Fortran I / O正常工作。在我自己的工作中,即使多个进程试图一次写入文件,我也看到了多个进程试图一次写入文件时文件的随机损坏。到文件的不同部分。 MPI-I / O或使用MPI-I / O的库(例如HDF5或NetCDF)是实现此目的的唯一明智方法。下面是一个简单的程序,说明了mpi_file_write_at_all

的用法
ian@eris:~/work/stack$ cat at.f90
Program write_at

  Use mpi

  Implicit None

  Integer,Parameter :: n = 4

  Real,Dimension( 1:n ) :: a

  Real,Dimension( : ),Allocatable :: all_of_a
  
  Integer :: me,nproc
  Integer :: handle
  Integer :: i
  Integer :: error
  
  ! Set up MPI
  Call mpi_init( error )
  Call mpi_comm_size( mpi_comm_world,nproc,error )
  Call mpi_comm_rank( mpi_comm_world,me,error )

  ! Provide some data
  a = [ ( i,i = n * me,n * ( me + 1 ) - 1 ) ]

  ! Open the file
  Call mpi_file_open( mpi_comm_world,'stuff.dat',&
       mpi_mode_create + mpi_mode_wronly,mpi_info_null,handle,error )

  ! Describe how the processes will view the file - in this case
  ! simply a stream of mpi_real
  Call mpi_file_set_view( handle,0_mpi_offset_kind,&
       mpi_real,mpi_real,'native',&
       mpi_info_null,error )

  ! Write the data using a collective routine - generally the most efficent
  ! but as collective all processes within the communicator must call the routine
  Call mpi_file_write_at_all( handle,Int( me * n,mpi_offset_kind ),&
       a,Size( a ),mpi_status_ignore,error )

  ! Close the file
  Call mpi_file_close( handle,error )

  ! Read the file on rank zero using Fortran to check the data
  If( me == 0 ) Then
     Open( 10,file = 'stuff.dat',access = 'stream' )
     Allocate( all_of_a( 1:n * nproc ) )
     Read( 10,pos = 1 ) all_of_a
     Write( *,* ) all_of_a
  End If

  ! Shut down MPI
  Call mpi_finalize( error )
  
End Program write_at
ian@eris:~/work/stack$ mpif90 --version
GNU Fortran (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0
Copyright (C) 2017 Free Software Foundation,Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

ian@eris:~/work/stack$ mpif90 -Wall -Wextra -fcheck=all -std=f2008 at.f90 
ian@eris:~/work/stack$ mpirun -np 2 ./a.out 
   0.00000000       1.00000000       2.00000000       3.00000000       4.00000000       5.00000000       6.00000000       7.00000000    
ian@eris:~/work/stack$ mpirun -np 5 ./a.out 
   0.00000000       1.00000000       2.00000000       3.00000000       4.00000000       5.00000000       6.00000000       7.00000000       8.00000000       9.00000000       10.0000000       11.0000000       12.0000000       13.0000000       14.0000000       15.0000000       16.0000000       17.0000000       18.0000000       19.0000000    
ian@eris:~/work/stack$