CMakeLists 链接错误 'magma_opts::parse_opts' 使用 MAGMA 测试代码

问题描述

目前我正在使用 MAGMA 2.5.4 来求解具有微小矩阵的批量线性求解器。我想在我通过 CMakeLists 文件编译的项目中包含 magma_dgesv_batched

include_directoriestarget_link_libraries 如下所示。

include_directories( "/usr/local/magma/include" )
include_directories( "/home/research/magma-2.5.4/magma-2.5.4/include" )
include_directories( "/home/research/magma-2.5.4/magma-2.5.4/testing" )
target_link_libraries(minus
-L/usr/local/magma/lib magma_sparse magma
-L/usr/lib/cuda/lib64 cublas cudart cusparse
-L/usr/lib/x86_64-linux-gnu/openblas-pthread/cmake/openblas openblas
pthread
)

但是,我遇到了一些链接错误

tmpxft_00003500_00000000-5_minus_cuda.cudafe1.cpp:(.text+0x506): undefined reference to `magma_opts::magma_opts(magma_opts_t)'
/usr/bin/ld: tmpxft_00003500_00000000-5_minus_cuda.cudafe1.cpp:(.text+0x514): undefined reference to `magma_opts::parse_opts(int,char**)'
/usr/bin/ld: tmpxft_00003500_00000000-5_minus_cuda.cudafe1.cpp:(.text+0xfce): undefined reference to `magma_opts::cleanup()'
collect2: error: ld returned 1 exit status
make[2]: *** [cmd/CMakeFiles/minus-simpleEx.dir/build.make:105: bin/minus-simpleEx] Error 1
make[1]: *** [CMakeFiles/Makefile2:926: cmd/CMakeFiles/minus-simpleEx.dir/all] Error 2
make: *** [Makefile:95: all] Error 2

显然,它表明我没有链接正确的库,但我不知道应该如何在我的 CMakeLists 文件中修复它。我查看了 MAGMA 文档,似乎没有需要链接的其他库(也许我做错了什么)。

MAGMA 安装成功,我也完美地运行了它的测试代码 magma_dgesv_batched。 Gcc 版本是 8,在 Ubuntu 20.04 中使用 cuda 10。

谢谢!

解决方法

好的,我在询问 MAGMA 的一位开发人员后以某种方式解决了问题。问题是,magma_opts::magma_opts(magma_opts_t) 不包含在标准 MAGMA 库中,但保存在 MAGMA 的 testing 文件夹中。我不应该完全从 MAGMA 复制测试代码并尝试运行它,而是应该模仿它的结构。为了解决测试代码中的 opt::queue,我需要通过 magma queuemagma_queue_create 创建一个 magma_queue_destroy

以下是完美运行的完整代码:

#include <stdio.h>
#include <stdlib.h>

// magma
#include "flops.h"
#include "magma_v2.h"
#include "magma_lapack.h"

int main() {
  magma_init();
  magma_print_environment();

  real_Double_t   gflops,cpu_perf,cpu_time,gpu_perf,gpu_time;
  float          error,Rnorm,Anorm,Xnorm,*work;
  magmaFloatComplex c_one     = MAGMA_C_ONE;
  magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
  magmaFloatComplex *h_A,*h_B,*h_X;
  magmaFloatComplex_ptr d_A,d_B;
  magma_int_t *dipiv,*dinfo_array;
  magma_int_t *ipiv,*cpu_info;
  magma_int_t N,nrhs,lda,ldb,ldda,lddb,info,sizeA,sizeB;
  magma_int_t ione = 1;
  magma_int_t ISEED[4] = {0,1};
  int status = 0;
  magma_int_t batchCount = 2;
  nrhs = 1;

  magmaFloatComplex **dA_array = NULL;
  magmaFloatComplex **dB_array = NULL;
  magma_int_t     **dipiv_array = NULL;

  bool use_lapack = 1;
  double tol = 0.000001;
  N = 6;

  magma_queue_t my_queue;    // magma queue variable,internally holds a cuda stream and a cublas handle
  magma_device_t cdev;       // variable to indicate current gpu id

  magma_getdevice( &cdev );
  magma_queue_create( cdev,&my_queue );     // create a queue on this cdev

  printf("%% BatchCount   N  NRHS   CPU Gflop/s (msec)   GPU Gflop/s (msec)   ||B - AX|| / N*||A||*||X||\n");
  printf("%%============================================================================================\n");
  lda    = N;
  ldb    = lda;
  ldda   = magma_roundup( N,32 );  // multiple of 32 by default
  lddb   = ldda;
  gflops = ( FLOPS_DGETRF( N,N ) + FLOPS_DGETRS( N,nrhs ) ) * batchCount / 1e9;

  sizeA = lda*N*batchCount;
  sizeB = ldb*nrhs*batchCount;

  magma_cmalloc_cpu( &h_A,sizeA );
  magma_cmalloc_cpu( &h_B,sizeB );
  magma_cmalloc_cpu( &h_X,sizeB );
  magma_smalloc_cpu( &work,N );
  magma_imalloc_cpu( &ipiv,batchCount*N );
  magma_imalloc_cpu( &cpu_info,batchCount );

  magma_cmalloc( &d_A,ldda*N*batchCount    );
  magma_cmalloc( &d_B,lddb*nrhs*batchCount );
  magma_imalloc( &dipiv,N * batchCount );
  magma_imalloc( &dinfo_array,batchCount );

  magma_malloc( (void**) &dA_array,batchCount * sizeof(magmaFloatComplex*) );
  magma_malloc( (void**) &dB_array,batchCount * sizeof(magmaFloatComplex*) );
  magma_malloc( (void**) &dipiv_array,batchCount * sizeof(magma_int_t*) );

  /* Initialize the matrices */
  lapackf77_clarnv( &ione,ISEED,&sizeA,h_A );
  lapackf77_clarnv( &ione,&sizeB,h_B );

  magma_csetmatrix( N,N*batchCount,h_A,d_A,my_queue );
  magma_csetmatrix( N,nrhs*batchCount,h_B,d_B,my_queue );

  /* ====================================================================
     Performs operation using MAGMA
     =================================================================== */
  magma_cset_pointer( dA_array,ldda*N,batchCount,my_queue );
  magma_cset_pointer( dB_array,lddb*nrhs,my_queue );
  magma_iset_pointer( dipiv_array,dipiv,1,N,my_queue );

  gpu_time = magma_sync_wtime( my_queue );
  info = magma_cgesv_batched(N,dA_array,dipiv_array,dB_array,dinfo_array,my_queue);
  gpu_time = magma_sync_wtime( my_queue ) - gpu_time;
  gpu_perf = gflops / gpu_time;

  // check correctness of results throught "dinfo_magma" and correctness of argument throught "info"
  magma_getvector( batchCount,sizeof(magma_int_t),cpu_info,my_queue );
  for (int i=0; i < batchCount; i++)
  {
      if (cpu_info[i] != 0 ) {
          printf("magma_dgesv_batched matrix %lld returned internal error %lld\n",(long long) i,(long long) cpu_info[i] );
      }
  }
  if (info != 0) {
      printf("magma_dgesv_batched returned argument error %lld: %s.\n",(long long) info,magma_strerror( info ));
  }

  //=====================================================================
  // Residual
  //=====================================================================
  magma_cgetmatrix( N,h_X,my_queue );

  error = 0;
  for (magma_int_t s=0; s < batchCount; s++)
  {
      Anorm = lapackf77_clange("I",&N,h_A + s * lda * N,&lda,work);
      Xnorm = lapackf77_clange("I",&nrhs,h_X + s * ldb * nrhs,&ldb,work);

      blasf77_cgemm( MagmaNoTransStr,MagmaNoTransStr,&c_one,&c_neg_one,h_B + s * ldb * nrhs,&ldb);

      Rnorm = lapackf77_clange("I",work);
      float err = Rnorm/(N*Anorm*Xnorm);

      if (std::isnan(err) || std::isinf(err)) {
          error = err;
          break;
      }
      error = max( err,error );
  }
  bool okay = (error < tol);
  status += ! okay;

  /* ====================================================================
     Performs operation using LAPACK
     =================================================================== */
  if ( use_lapack ) {
      cpu_time = magma_wtime();
      // #define BATCHED_DISABLE_PARCPU
      #if !defined (BATCHED_DISABLE_PARCPU) && defined(_OPENMP)
      magma_int_t nthreads = magma_get_lapack_numthreads();
      magma_set_lapack_numthreads(1);
      magma_set_omp_numthreads(nthreads);
      #pragma omp parallel for schedule(dynamic)
      #endif
      for (magma_int_t s=0; s < batchCount; s++)
      {
          magma_int_t locinfo;
          lapackf77_cgesv( &N,ipiv + s * N,&locinfo );
          if (locinfo != 0) {
              printf("lapackf77_cgesv matrix %lld returned error %lld: %s.\n",(long long) s,(long long) locinfo,magma_strerror( locinfo ));
          }
      }
      #if !defined (BATCHED_DISABLE_PARCPU) && defined(_OPENMP)
          magma_set_lapack_numthreads(nthreads);
      #endif
      cpu_time = magma_wtime() - cpu_time;
      cpu_perf = gflops / cpu_time;
      printf( "%10lld %5lld %5lld   %7.2f (%7.2f)   %7.2f (%7.2f)   %8.2e   %s\n",(long long) batchCount,(long long) N,(long long) nrhs,cpu_time*1000,gpu_time*1000,error,(okay ? "ok" : "failed"));
  }
  else {
      printf( "%10lld %5lld %5lld     ---   (  ---  )   %7.2f (%7.2f)   %8.2e   %s\n",gpu_time,(okay ? "ok" : "failed"));
  }

  magma_queue_destroy( my_queue );

  magma_free_cpu( h_A );
  magma_free_cpu( h_B );
  magma_free_cpu( h_X );
  magma_free_cpu( work );
  magma_free_cpu( ipiv );
  magma_free_cpu( cpu_info );

  magma_free( d_A );
  magma_free( d_B );

  magma_free( dipiv );
  magma_free( dinfo_array );

  magma_free( dA_array );
  magma_free( dB_array );
  magma_free( dipiv_array );

  fflush( stdout );

  printf( "\n" );

  magma_finalize();
}