本文共 1006 字,大约阅读时间需要 3 分钟。
#define CUDACHECK(cmd) do { \ cudaError_t e = cmd; \ if( e != cudaSuccess ) { \ printf("Failed: Cuda error %s:%d '%s'\n", \ __FILE__,__LINE__,cudaGetErrorString(e)); \ exit(1); \ } \} while(0)int nDev=2; float** sendbuff = (float**)malloc(nDev * sizeof(float*)); float** recvbuff = (float**)malloc(nDev * sizeof(float*)); cudaStream_t* s = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nDev); int size=4; for (int i = 0; i < nDev; ++i) { CUDACHECK(cudaSetDevice(localRank*nDev + i)); CUDACHECK(cudaMalloc(sendbuff + i, size * sizeof(float))); CUDACHECK(cudaMalloc(recvbuff + i, size * sizeof(float))); CUDACHECK(cudaMemset(sendbuff[i], 1, size * sizeof(float))); CUDACHECK(cudaMemset(recvbuff[i], 0, size * sizeof(float))); float *h_arr; h_arr = (float *)malloc(size*sizeof(float)); for (int i=0; i
转载地址:http://nzwmi.baihongyu.com/