向量相加,請以「nvcc vecAdd01.cu -Xcompiler "/wd 4819"」指令編譯; 以後之範例除了更換檔名以外,原則上其餘皆同:
#include<stdio.h> #include<time.h> #include<stdlib.h> #define SIZE 100000 __global__ static void vecAdd(int *a, int *b, int *c, int len){ int idx = blockIdx.x * blockDim.x + threadIdx.x; if(idx<len){ c[idx] = a[idx] + b[idx]; } } int a[SIZE], b[SIZE], c1[SIZE], c2[SIZE]; int main() { int *gpu_a, *gpu_b, *gpu_c; int i, err = 0; srand(time(NULL)); for(i=0;i<SIZE;i++){ a[i] = rand() % 100; b[i] = rand() % 100; c1[i] = a[i] + b[i]; } cudaMalloc((void**)&gpu_a, sizeof(int) * SIZE); cudaMalloc((void**)&gpu_b, sizeof(int) * SIZE); cudaMalloc((void**)&gpu_c, sizeof(int) * SIZE); cudaMemcpy(gpu_a, a, sizeof(int) * SIZE, cudaMemcpyHostToDevice); cudaMemcpy(gpu_b, b, sizeof(int) * SIZE, cudaMemcpyHostToDevice); vecAdd<<<100, 1000>>>(gpu_a, gpu_b, gpu_c, SIZE); cudaThreadSynchronize(); cudaMemcpy(c2, gpu_c, sizeof(int) * SIZE, cudaMemcpyDeviceToHost); for(i=0;i<SIZE;i++){ err += abs(c1[i]-c2[i]); } printf("Err: %d", err); return 0; }