CUDA,day-2,共享内存-排序算法
2016-03-17 09:32
169 查看
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <stdio.h>
using namespace std;
#define u32 unsigned int
__device__ void func1(
u32 * const sort_tmp,
const u32 num_lists,
const u32 num_elements,
const u32 tid,
u32 * const sort_tmp_0,
u32 * const sort_tmp_1);
__device__ void func2(
u32 * const sort_tmp,
const u32 num_lists,
const u32 num_elements,
const u32 tid,
u32 * const sort_tmp_0);
int main()
{
return 0;
}
__device__ void func1(
u32 * const sort_tmp,
const u32 num_lists,
const u32 num_elements,
const u32 tid,
u32 * const sort_tmp_0,
u32 * const sort_tmp_1)
{
for (u32 bit = 0; bit < 32; bit++)
{
u32 base_cnt_0 = 0;
u32 base_cnt_1 = 0;
for (u32 i = 0; i < num_elements; i += num_lists)
{
const u32 elem = sort_tmp[i + tid];
const u32 bit_mask = (1 << bit);
if ((elem&bit_mask)>0)
{
sort_tmp_1[base_cnt_1 + tid] = elem;
base_cnt_1 += num_lists;
}
else
{
sort_tmp_0[base_cnt_0 + tid] = elem;
base_cnt_0 += num_lists;
}
}
for (u32 i = 0; i < base_cnt_0; i += num_lists)
{
sort_tmp[i + tid] = sort_tmp_0[i + tid];
}
for (u32 i = 0; i < base_cnt_1; i += num_lists)
{
sort_tmp[base_cnt_0 + i + tid] = sort_tmp_1[i + tid];
}
}
}
__device__ void func2(
u32 * const sort_tmp,
const u32 num_lists,
const u32 num_elements,
const u32 tid,
u32 * const sort_tmp_0)
{
for (u32 bit = 0; bit < 32; bit++)
{
const u32 bit_mask = (1 << bit);
u32 base_cnt_0 = 0;
u32 base_cnt_1 = 0;
for (u32 i = 0; i < num_elements; i += num_lists)
{
const u32 elem = sort_tmp[i + tid];
if ((elem&bit_mask)>0)
{
sort_tmp_0[base_cnt_1 + tid] = elem;
base_cnt_1 += num_lists;
}
else
{
sort_tmp[base_cnt_0 + tid] = elem;
base_cnt_0 += num_lists;
}
}
for (u32 i = 0; i < base_cnt_0; i += num_lists)
{
sort_tmp[base_cnt_0 + i + tid] = sort_tmp_0[i + tid];
}
}
}
#include "device_launch_parameters.h"
#include <iostream>
#include <stdio.h>
using namespace std;
#define u32 unsigned int
__device__ void func1(
u32 * const sort_tmp,
const u32 num_lists,
const u32 num_elements,
const u32 tid,
u32 * const sort_tmp_0,
u32 * const sort_tmp_1);
__device__ void func2(
u32 * const sort_tmp,
const u32 num_lists,
const u32 num_elements,
const u32 tid,
u32 * const sort_tmp_0);
int main()
{
return 0;
}
__device__ void func1(
u32 * const sort_tmp,
const u32 num_lists,
const u32 num_elements,
const u32 tid,
u32 * const sort_tmp_0,
u32 * const sort_tmp_1)
{
for (u32 bit = 0; bit < 32; bit++)
{
u32 base_cnt_0 = 0;
u32 base_cnt_1 = 0;
for (u32 i = 0; i < num_elements; i += num_lists)
{
const u32 elem = sort_tmp[i + tid];
const u32 bit_mask = (1 << bit);
if ((elem&bit_mask)>0)
{
sort_tmp_1[base_cnt_1 + tid] = elem;
base_cnt_1 += num_lists;
}
else
{
sort_tmp_0[base_cnt_0 + tid] = elem;
base_cnt_0 += num_lists;
}
}
for (u32 i = 0; i < base_cnt_0; i += num_lists)
{
sort_tmp[i + tid] = sort_tmp_0[i + tid];
}
for (u32 i = 0; i < base_cnt_1; i += num_lists)
{
sort_tmp[base_cnt_0 + i + tid] = sort_tmp_1[i + tid];
}
}
}
__device__ void func2(
u32 * const sort_tmp,
const u32 num_lists,
const u32 num_elements,
const u32 tid,
u32 * const sort_tmp_0)
{
for (u32 bit = 0; bit < 32; bit++)
{
const u32 bit_mask = (1 << bit);
u32 base_cnt_0 = 0;
u32 base_cnt_1 = 0;
for (u32 i = 0; i < num_elements; i += num_lists)
{
const u32 elem = sort_tmp[i + tid];
if ((elem&bit_mask)>0)
{
sort_tmp_0[base_cnt_1 + tid] = elem;
base_cnt_1 += num_lists;
}
else
{
sort_tmp[base_cnt_0 + tid] = elem;
base_cnt_0 += num_lists;
}
}
for (u32 i = 0; i < base_cnt_0; i += num_lists)
{
sort_tmp[base_cnt_0 + i + tid] = sort_tmp_0[i + tid];
}
}
}
相关文章推荐
- CUDA,day-2,合并列表
- HDU1686:Oulipo
- android开发学习3
- iOS中UIScrollerView的用法及基于AotoLayout的控件悬停
- 大数据应用开发八大基本原则
- CUDA,day-2,二维数组操作
- 递推_斐波拉契数列
- xml和json数据的比较
- iOS蓝牙开发
- CUDA,day-1,一维数组
- 乱码
- 文件加解密方法
- 警告框和操作表的使用方法
- Linux笔记(50)——条件判断式语句
- 机器学习笔记—线性回归
- HBase连接池 -- HTablePool被Deprecated之后
- 树莓派搭建花生壳
- 项目生命周期有哪些类型?分别适用于什么情况下?
- linux第一次读书笔记
- BOM