问题描述
以下是来源:
public static void RunPowershellCommand()
{
try
{
using (var runspace = RunspaceFactory.CreateRunspace())
{
runspace.open();
using (var ps = System.Management.Automation.PowerShell.Create())
{
ps.Runspace = runspace;
Command queryWmi = new Command(@"C:\Program Files\Microsoft Data Migration Assistant\SkuRecommendationDataCollectionScript.ps1");
queryWmi.Parameters.Add("ComputerName",CompName);
queryWmi.Parameters.Add("OutputFilePath",OutPutPath);
queryWmi.Parameters.Add("CollectionTimeInSeconds",time.ToString());
queryWmi.Parameters.Add("DbConnectionString",DBString);
ps.AddScript("Set-ExecutionPolicy RemoteSigned");
ps.Invoke();
ps.Commands.AddCommand(queryWmi);
var commandResults = ps.Invoke();
Debug.WriteLine($"PS command result {commandResults.Count} lines");
foreach (var commandResult in commandResults)
{
Debug.WriteLine(" Command Result : " + commandResult);
MessageBox.Show(commandResult.ToString());
}
if (ps.Streams.Error.Count != 0)
{
Debug.WriteLine("Errors:");
foreach (var error in ps.Streams.Error)
{
Debug.WriteLine("Command Error : " + error.ToString());
}
}
Console.WriteLine("Finished");
Console.ReadLine();
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
这是VC ++ 2017制作的程序集,它是调用该函数的完整循环:
// Same as multiplyComplex,multiplies 4 numbers
__forceinline __m256 multiplyComplex_x4( const __m256 x,const __m256 y )
{
// If the inputs are [ a,b ] and [ c,d ] the formula is [ ac - bd,ad + bc ]
const __m256 x2 = _mm256_movehdup_ps( x ); // [ b,b ]
const __m256 yRev = _mm256_permute_ps( y,shuffleMask_rev64q ); // [ d,c ]
const __m256 prod2 = _mm256_mul_ps( x2,yRev ); // [ bd,bc ]
const __m256 x1 = _mm256_moveldup_ps( x ); // [ a,a ]
return _mm256_fmaddsub_ps( x1,y,prod2 );
}
// Same as fftMainLoop,handles 4 complex numbers
__forceinline void fftMainLoop_x4( const __m256 omega,complex* a1c,complex* a2c )
{
const __m256 a1 = _mm256_loadu_ps( (const float*)a1c );
const __m256 a2 = _mm256_loadu_ps( (const float*)a2c );
const __m256 product = multiplyComplex_x4( omega,a2 );
_mm256_storeu_ps( (float*)a1c,_mm256_add_ps( a1,product ) );
_mm256_storeu_ps( (float*)a2c,_mm256_sub_ps( a1,product ) );
}
代码从内存中读取的频率比我希望的要高50%。
两条说明
00007FF7CC3F11B0 lea eax,[r8+4]
00007FF7CC3F11B4 mov ecx,r8d
00007FF7CC3F11B7 vpermilps ymm0,ymmword ptr [rdi+rax*8],0B1h
00007FF7CC3F11BE vmulps ymm2,ymm0,ymm4
00007FF7CC3F11C2 vfmaddsub231ps ymm2,ymm5,ymmword ptr [rdi+rax*8]
00007FF7CC3F11C8 vmovups ymm3,ymmword ptr [rdi+rcx*8]
00007FF7CC3F11CD vaddps ymm0,ymm2,ymm3
00007FF7CC3F11D1 add r8d,8
00007FF7CC3F11D5 vsubps ymm1,ymm3,ymm2
00007FF7CC3F11D9 vmovups ymmword ptr [rdi+rcx*8],ymm0
00007FF7CC3F11DE vmovups ymmword ptr [rdi+rax*8],ymm1
00007FF7CC3F11E3 cmp r8d,esi
00007FF7CC3F11E6 jb fft_run+1B0h (07FF7CC3F11B0h)
和
vpermilps ymm0,0B1h
两者都访问相同的内存地址。
是否有办法说服编译器为我的vfmaddsub231ps ymm2,ymmword ptr [rdi+rax*8]
发出vmovups
而不是尝试合并负载?
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)