您的位置：首页 > 其它

在GPU核函数中，为什么数值型变量可以和数组型变量相互赋值, CPU中不可以

2017-11-10 07:27 246 查看

/*
* 源程序来自于 炼数成金教程
* 在GPU核函数中，为什么数值型变量可以和数组型变量相互赋值, CPU中不可以
*
* */
#include <iostream>
using namespace std;

__global__ void global_scan(float* d_out,float* d_in){
int idx = threadIdx.x;
float out = 0.00f; //声明一个float类型变量，并初始化
out = d_in[idx];   //d_in[idx]中随着idx改变d_in[idx]也改变，然而并没有覆盖out，
                     //out也变成一个数组了，why？？？？？？？？？？？？
__syncthreads();
d_out[idx] = out;
__syncthreads();

}

int main(int argc,char** argv){
const int ARRAY_SIZE = 8;
const int ARRAY_BYTES = ARRAY_SIZE * sizeof(float);

// generate the input array on the host
float h_in[ARRAY_SIZE];
for(int i=0;i<ARRAY_SIZE;i++){
h_in[i] = float(i);
}
float h_out[ARRAY_SIZE];

// declare GPU memory pointers
float* d_in; //声明为一个float型指针
float* d_out;

// allocate GPU memory
cudaMalloc((void**) &d_in,ARRAY_BYTES);
cudaMalloc((void**) &d_out,ARRAY_BYTES);

// transfer the array to GPU
cudaMemcpy(d_in,h_in,ARRAY_BYTES,cudaMemcpyHostToDevice);

// launch the kernel
global_scan<<<1,ARRAY_SIZE>>>(d_out,d_in);

// copy back the result array to the GPU
cudaMemcpy(h_out,d_out,ARRAY_BYTES,cudaMemcpyDeviceToHost);

// print out the resulting array
for(int i=0;i<ARRAY_SIZE;i++){
cout<<h_out[i]<<"  ";
}

// free GPU memory allocation
cudaFree(d_in);
cudaFree(d_out);

return 0;

}

/////////////////////////////////////////////////////////

结果如下：

/////////////////////////////////////////////////////////

0 1 2 3 4 5 6 7

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////

#include<iostream>
using namespace std;

int main()
{

int a[3] = {1, 2, 3};
int b;
int *c;

//b = a; //这里把数组a赋值给变量b，会报错，但是在GPU内核函数中不会报错，为什么？？？？？
c = a; //把数组a的首地址给b

cout<<"  a[2]= "<<a[2]<<"  c[2]= "<<c[2]<<endl;

}

/////////////////////////////////////////////////////////

结果如下：

/////////////////////////////////////////////////////////

a[2]= 3 c[2]= 3

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航