(中)如果速度比精確度重要,可使用 fast math library。以下範例為精確度的展示:
#include<stdio.h>
#include<math.h>
#define SIZE 100000
__global__ static void sineTest(float *out){
out[0] = sinf(0.5);
out[1] = __sinf(0.5);
}
int main(int argc,char **argv)
{
float out[3], *gpu_out;
cudaMalloc((void**)&gpu_out, sizeof(float) * 2);
sineTest<<<1, 2>>>(gpu_out);
out[2] = sin(0.5);
cudaThreadSynchronize();
cudaMemcpy(out, gpu_out, sizeof(int) * 2, cudaMemcpyDeviceToHost);
printf("GPU vs GPU-fast: %f\n", log10(fabs(out[0]-out[1])));
printf("GPU vs CPU: %f\n", log10(fabs(out[0]-out[2])));
printf("GPU-fast vs CPU: %f\n", log10(fabs(out[1]-out[2])));
return 0;
}