问题描述
我正在使用 Dev C++ 编译器来运行下面的 C 代码。我使用了 _UADD 汇编函数使其更加优化。但是我收到 _UADD 的未定义参考错误。我只有很少的汇编语言背景。我可以使用这个汇编函数 _UADD 来优化我的 C 代码并在 Dev C++ 中编译吗?如果是这样怎么办?
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <x86intrin.h>
#include <stdint.h>
#ifndef N1_8_H
#define N1_8_H
void refined_karatsuba_mul(int16_t* h,const int16_t* f,const int16_t* g,int n);
#endif
static void schoolbook_mul(int16_t* h,int n)
{
int16_t result;
int i,j;
for (i = 0; i < n; ++i) {
result = 0;
for (j = 0; j <= i; ++j) result = result+f[j]*g[i-j];
h[i] = result;
}
for (i = n; i < n+n-1; ++i) {
result = 0;
for (j = i-n+1; j < n; ++j) result = result+f[j]*g[i-j];
h[i] = result;
}
}
void refined_karatsuba_mul(int16_t* h,int n) {
const int16_t* f0 = f;
int16_t f01[n/2];
const int16_t* f1 = f + n/2;
const int16_t* g0 = g;
int16_t g01[n/2];
const int16_t* g1 = g + n/2;
uint32_t result;
for (int i = 0; i < n/2; i+=2) {
*(int32_t*)(&f01[i]) = __UADD16(*(int32_t*)(&f0[i]),*(int32_t*)(&f1[i])); // f01[i]= f0[i] + f1[i] and f01[i+1]= f0[i+1] + f1[i+1]
*(int32_t*)(&g01[i]) = __UADD16(*(int32_t*)(&g0[i]),*(int32_t*)(&g1[i])); // g01[i]= g0[i] + g1[i] and g01[i+1]= g0[i+1] + g1[i+1]
}
int16_t p1[n-1],p2[n-1],p3[n-1];
schoolbook_mul(p1,f0,g0,n/2);
schoolbook_mul(p2,f01,g01,n/2);
schoolbook_mul(p3,f1,g1,n/2);
for(int i = 0; i < n-1; i++) {
h[i] += p1[i];
h[i+n/2] += p2[i] - p1[i] - p3[i];
h[i+n] += p3[i];
}
}
int main(){
int16_t *f = malloc(8*sizeof(int16_t));
int16_t *g = malloc(8*sizeof(int16_t));
int16_t *h= calloc((2*8-1),sizeof(int16_t));
int i;
////////////////////////reading f and g
FILE *myFile;
myFile = fopen("inp8test","r");
if (myFile == NULL){
printf("Error Reading File\n");
exit (0);}
for (i = 0; i < 8; i++){
fscanf(myFile,"%d",&f[i]);
f[i] = 0;
}
for (i = 0; i < 8; i++){
fscanf(myFile,&g[i] );
g[i] = 0;
}
fclose(myFile);
unsigned long long LastCycleCount = _rdtsc();
clock_t start = clock ();
for (i=0;i<999;i++){
refined_karatsuba_mul(h,f,g,8);
memset(h,sizeof(int16_t)*(2*8-1));
}
refined_karatsuba_mul(h,8);
unsigned long long EndCycleCount = _rdtsc();
unsigned long long CyclesElapsed = EndCycleCount - LastCycleCount;
CyclesElapsed = CyclesElapsed/1000 ;
double Timelapsed=(clock()-start)/(double) CLOCKS_PER_SEC;
Timelapsed=Timelapsed/1000;
/////////////////////////////////////////////write on c_n1_8 file
FILE *outFile;
outFile = fopen("c_n1_8","w");
if (outFile == NULL)
{
printf("Cannot Open File\n");
exit (0);
}
for (i = 0; i < 2*8-1; i++)
fprintf(outFile,"%d ",((h[i])%3+3)%3 );
fclose(outFile);
/////////////////////////////////////////////////////////////printing on console
for( i=0; i<2*8-1; i++)
printf("%d ",((h[i])%3+3)%3);
printf("\n\n\n");
printf("Cycles: %llu\n",CyclesElapsed);
printf("Multiplication Time:");
printf( "%f",Timelapsed);
printf("sec");
printf("\n \n ");
free(f);
free(g);
free(h);
return 0;
}
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)