问题描述
我在 python 中发现了 fortran 和 ctypes,因为我计划使用 intel MKL 向 python 公开一个 fortran 库,现在从 ac libray 本身调用的库被从 c++ 库调用......最终由 c# 使用.. .
一开始,我设法编写了定义各种矩阵乘法函数的简单 fortran 代码:一个简单的,一个使用 fortran 的 matmul
,一个使用 intel MKL 的 dgemm
:
subroutine matmultorig(M1,M2,M3,M,N,K) bind(c,name='matmultorig')
!DEC$ ATTRIBUTES DLLEXPORT :: matmultorig
use iso_c_binding,only: c_double,c_int
integer(c_int),intent(in) :: M,K
real(c_double),intent(in) :: M1(M,N),M2(N,K)
real(c_double),intent(inout):: M3(M,K)
M3 = matmul(M1,M2)
end subroutine
subroutine matmultmy(M1,name='matmultmy')
!DEC$ ATTRIBUTES DLLEXPORT :: matmultmy
use iso_c_binding,K)
integer :: i,j,l
do i = 1,M
do j = 1,K
M3(i,j) = 0.
do l = 1,N
M3(i,j) = M3(i,j) + M1(i,l) * M2(l,j)
end do
end do
end do
end subroutine
subroutine matmultmkl(M1,name='matmultmkl')
!DEC$ ATTRIBUTES DLLEXPORT :: matmultmkl
use iso_c_binding,K)
CALL DGemm('N','N',K,1.,M1,0.,M)
end subroutine
我使用 .bat
文件编译 fortran(我在 windows 下):
@Echo off
setlocal ENABLEDELAYEDEXPANSION
SET "IFORT_INITIAL_FLAGS=-c -fpp"
SET "IFORT_OPTIMIZATION_FLAGS=/O3"
ifort %IFORT_OPTIMIZATION_FLAGS% %IFORT_INITIAL_FLAGS% /I"C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2020.4.311\windows\mkl\include" -o test.obj test.f
ifort -dll -o mylib.dll test.obj /link /LIBPATH:"C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2020.4.311\windows\mkl\lib\intel64_win" mkl_intel_lp64.lib mkl_intel_thread.lib mkl_core.lib libiomp5md.lib
最后,我编写了以下 python 脚本,我正在从 Visual Studio Code 执行:
from ctypes import *
import time
import os
os.add_dll_directory(r"C:/Program Files (x86)/IntelSWTools/compilers_and_libraries_2020.4.311/windows/redist/intel64_win/mkl")
os.add_dll_directory(r"C:/Program Files (x86)/IntelSWTools/compilers_and_libraries_2020.4.311/windows/redist/intel64_win/compiler")
import numpy as np
mylib = CDLL(r"C:/path/to/the/fortran/mylib.dll")
mylib.matmultmy.argtypes = [ POINTER(c_double),POINTER(c_double),POINTER(c_int),POINTER(c_int) ]
mylib.matmultorig.argtypes = [ POINTER(c_double),POINTER(c_int) ]
mylib.matmultmkl.argtypes = [ POINTER(c_double),POINTER(c_int) ]
# Setup
M=500
N=500
K=500
a = np.empty((M,dtype=c_double)
b = np.empty((N,K),dtype=c_double)
c = np.empty((M,dtype=c_double)
a[:] = np.random.rand(M,N)
b[:] = np.random.rand(N,K)
# Fortran my call
start = time.time()
mylib.matmultmy( a.ctypes.data_as(POINTER(c_double)),b.ctypes.data_as(POINTER(c_double)),c.ctypes.data_as(POINTER(c_double)),c_int(M),c_int(N),c_int(K) )
stop = time.time()
print(f"Fortran my \t {stop - start}s")
# Fortran matmul call
start = time.time()
mylib.matmultorig( a.ctypes.data_as(POINTER(c_double)),c_int(K) )
stop = time.time()
print(f"Fortran matmul \t {stop - start}s")
# Fortran mkl call
start = time.time()
mylib.matmultmkl( a.ctypes.data_as(POINTER(c_double)),c_int(K) )
stop = time.time()
print(f"Fortran mkl \t {stop - start}s")
# Numpy
start = time.time()
c = a.dot(b)
stop = time.time()
print(f"Numpy \t\t {stop - start}s")
结果是:
Fortran my 0.11234903335571289s
Fortran matmul 0.023325443267822266s
Fortran mkl 0.5279343128204346s
Numpy 0.001001596450805664s
我在玩弄同样的想法,但在 python c++ pybind11 上下文中(相同的矩阵大小):
pybind11 vs numpy for a matrix product
现在的数字无法与我的情况相提并论,但至少 numpy 和 intel mkl 的表现在某种程度上是相同的。在这里,调用 dgemm
的函数是 numpy 矩阵乘积的 500 倍。我怀疑这是因为以次要方式编组,主要是因为“c 绑定”。然而,我两天前发现了这一切,所以如果专业人士有想法......
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)