您的位置:首页 > 移动开发 > Cocos引擎

cocos2dx 高性能高斯模糊(包含lua接口)

2016-12-07 10:15 330 查看
根据官方的帖子实现的高斯模糊当前屏幕内容  点击打开链接

1.截屏缩小压缩,减小像素采样的优化算法。默认截屏后缩小到原来的1/4。

2.C++代码进行一次性高斯模糊。避免使用shader造成的渲染掉帧

以下是C++部分代码:

/*
* 高斯模糊接口  缩放因子:iScale,截图会把全屏压缩为1/iScale大
*/
static void gaussianBlur(const std::function<void(bool, cocos2d::Image*)>& afterCaptured, int iScale = 4);

// The Stack Blur Algorithm was invented by Mario Klingemann,
// mario@quasimondo.com and described here:
// http://incubator.quasimondo.com/processing/fast_blur_deluxe.php 
// This is C++ RGBA (32 bit color) multi-threaded version
// by Victor Laskin (victor.laskin@gmail.com)
// More details: http://vitiy.info/stackblur-algorithm-multi-threaded-blur-for-cpp 
// This code is using MVThread class from my cross-platform framework
// You can exchange it with any thread implementation you like
// -------------------------------------- stackblur ----------------------------------------->

static unsigned short const stackblur_mul[255] =
{
512, 512, 456, 512, 328, 456, 335, 512, 405, 328, 271, 456, 388, 335, 292, 512,
454, 405, 364, 328, 298, 271, 496, 456, 420, 388, 360, 335, 312, 292, 273, 512,
482, 454, 428, 405, 383, 364, 345, 328, 312, 298, 284, 271, 259, 496, 475, 456,
437, 420, 404, 388, 374, 360, 347, 335, 323, 312, 302, 292, 282, 273, 265, 512,
497, 482, 468, 454, 441, 428, 417, 405, 394, 383, 373, 364, 354, 345, 337, 328,
320, 312, 305, 298, 291, 284, 278, 271, 265, 259, 507, 496, 485, 475, 465, 456,
446, 437, 428, 420, 412, 404, 396, 388, 381, 374, 367, 360, 354, 347, 341, 335,
329, 323, 318, 312, 307, 302, 297, 292, 287, 282, 278, 273, 269, 265, 261, 512,
505, 497, 489, 482, 475, 468, 461, 454, 447, 441, 435, 428, 422, 417, 411, 405,
399, 394, 389, 383, 378, 373, 368, 364, 359, 354, 350, 345, 341, 337, 332, 328,
324, 320, 316, 312, 309, 305, 301, 298, 294, 291, 287, 284, 281, 278, 274, 271,
268, 265, 262, 259, 257, 507, 501, 496, 491, 485, 480, 475, 470, 465, 460, 456,
451, 446, 442, 437, 433, 428, 424, 420, 416, 412, 408, 404, 400, 396, 392, 388,
385, 381, 377, 374, 370, 367, 363, 360, 357, 354, 350, 347, 344, 341, 338, 335,
332, 329, 326, 323, 320, 318, 315, 312, 310, 307, 304, 302, 299, 297, 294, 292,
289, 287, 285, 282, 280, 278, 275, 273, 271, 269, 267, 265, 263, 261, 259
};

static unsigned char const stackblur_shr[255] =
{
9, 11, 12, 13, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17,
17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24
};

/// Stackblur algorithm body
void stackblurJob(unsigned char* src,				///< input image data
unsigned int w,					///< image width
unsigned int h,					///< image height
unsigned int radius,				///< blur intensity (should be in 2..254 range)
int cores,						///< total number of working threads
int core,							///< current thread number
int step,							///< step of processing (1,2)
unsigned char* stack				///< stack buffer
)
{
unsigned int x, y, xp, yp, i;
unsigned int sp;
unsigned int stack_start;
unsigned char* stack_ptr;

unsigned char* src_ptr;
unsigned char* dst_ptr;

unsigned long sum_r;
unsigned long sum_g;
unsigned long sum_b;
unsigned long sum_a;
unsigned long sum_in_r;
unsigned long sum_in_g;
unsigned long sum_in_b;
unsigned long sum_in_a;
unsigned long sum_out_r;
unsigned long sum_out_g;
unsigned long sum_out_b;
unsigned long sum_out_a;

unsigned int wm = w - 1;
unsigned int hm = h - 1;
unsigned int w4 = w * 4;
unsigned int div = (radius * 2) + 1;
unsigned int mul_sum = stackblur_mul[radius];
unsigned char shr_sum = stackblur_shr[radius];

if (step == 1)
{
int minY = core * h / cores;
int maxY = (core + 1) * h / cores;

for (y = minY; y < maxY; y++)
{
sum_r = sum_g = sum_b = sum_a =
sum_in_r = sum_in_g = sum_in_b = sum_in_a =
sum_out_r = sum_out_g = sum_out_b = sum_out_a = 0;

src_ptr = src + w4 * y; // start of line (0,y)

for (i = 0; i <= radius; i++)
{
stack_ptr = &stack[4 * i];
stack_ptr[0] = src_ptr[0];
stack_ptr[1] = src_ptr[1];
stack_ptr[2] = src_ptr[2];
stack_ptr[3] = src_ptr[3];
sum_r += src_ptr[0] * (i + 1);
sum_g += src_ptr[1] * (i + 1);
sum_b += src_ptr[2] * (i + 1);
sum_a += src_ptr[3] * (i + 1);
sum_out_r += src_ptr[0];
sum_out_g += src_ptr[1];
sum_out_b += src_ptr[2];
sum_out_a += src_ptr[3];
}

for (i = 1; i <= radius; i++)
{
if (i <= wm) src_ptr += 4;
stack_ptr = &stack[4 * (i + radius)];
stack_ptr[0] = src_ptr[0];
stack_ptr[1] = src_ptr[1];
stack_ptr[2] = src_ptr[2];
stack_ptr[3] = src_ptr[3];
sum_r += src_ptr[0] * (radius + 1 - i);
sum_g += src_ptr[1] * (radius + 1 - i);
sum_b += src_ptr[2] * (radius + 1 - i);
sum_a += src_ptr[3] * (radius + 1 - i);
sum_in_r += src_ptr[0];
sum_in_g += src_ptr[1];
sum_in_b += src_ptr[2];
sum_in_a += src_ptr[3];
}

sp = radius;
xp = radius;
if (xp > wm) xp = wm;
src_ptr = src + 4 * (xp + y * w); //   img.pix_ptr(xp, y);
dst_ptr = src + y * w4; // img.pix_ptr(0, y);
for (x = 0; x < w; x++)
{
dst_ptr[0] = (sum_r * mul_sum) >> shr_sum;
dst_ptr[1] = (sum_g * mul_sum) >> shr_sum;
dst_ptr[2] = (sum_b * mul_sum) >> shr_sum;
dst_ptr[3] = (sum_a * mul_sum) >> shr_sum;
dst_ptr += 4;

sum_r -= sum_out_r;
sum_g -= sum_out_g;
sum_b -= sum_out_b;
sum_a -= sum_out_a;

stack_start = sp + div - radius;
if (stack_start >= div) stack_start -= div;
stack_ptr = &stack[4 * stack_start];

sum_out_r -= stack_ptr[0];
sum_out_g -= stack_ptr[1];
sum_out_b -= stack_ptr[2];
sum_out_a -= stack_ptr[3];

if (xp < wm)
{
src_ptr += 4;
++xp;
}

stack_ptr[0] = src_ptr[0];
stack_ptr[1] = src_ptr[1];
stack_ptr[2] = src_ptr[2];
stack_ptr[3] = src_ptr[3];

sum_in_r += src_ptr[0];
sum_in_g += src_ptr[1];
sum_in_b += src_ptr[2];
sum_in_a += src_ptr[3];
sum_r += sum_in_r;
sum_g += sum_in_g;
sum_b += sum_in_b;
sum_a += sum_in_a;

++sp;
if (sp >= div) sp = 0;
stack_ptr = &stack[sp * 4];

sum_out_r += stack_ptr[0];
sum_out_g += stack_ptr[1];
sum_out_b += stack_ptr[2];
sum_out_a += stack_ptr[3];
sum_in_r -= stack_ptr[0];
sum_in_g -= stack_ptr[1];
sum_in_b -= stack_ptr[2];
sum_in_a -= stack_ptr[3];

}

}
}

// step 2
if (step == 2)
{
int minX = core * w / cores;
int maxX = (core + 1) * w / cores;

for (x = minX; x < maxX; x++)
{
sum_r = sum_g = sum_b = sum_a =
sum_in_r = sum_in_g = sum_in_b = sum_in_a =
sum_out_r = sum_out_g = sum_out_b = sum_out_a = 0;

src_ptr = src + 4 * x; // x,0
for (i = 0; i <= radius; i++)
{
stack_ptr = &stack[i * 4];
stack_ptr[0] = src_ptr[0];
stack_ptr[1] = src_ptr[1];
stack_ptr[2] = src_ptr[2];
stack_ptr[3] = src_ptr[3];
sum_r += src_ptr[0] * (i + 1);
sum_g += src_ptr[1] * (i + 1);
sum_b += src_ptr[2] * (i + 1);
sum_a += src_ptr[3] * (i + 1);
sum_out_r += src_ptr[0];
sum_out_g += src_ptr[1];
sum_out_b += src_ptr[2];
sum_out_a += src_ptr[3];
}
for (i = 1; i <= radius; i++)
{
if (i <= hm) src_ptr += w4; // +stride

stack_ptr = &stack[4 * (i + radius)];
stack_ptr[0] = src_ptr[0];
stack_ptr[1] = src_ptr[1];
stack_ptr[2] = src_ptr[2];
stack_ptr[3] = src_ptr[3];
sum_r += src_ptr[0] * (radius + 1 - i);
sum_g += src_ptr[1] * (radius + 1 - i);
sum_b += src_ptr[2] * (radius + 1 - i);
sum_a += src_ptr[3] * (radius + 1 - i);
sum_in_r += src_ptr[0];
sum_in_g += src_ptr[1];
sum_in_b += src_ptr[2];
sum_in_a += src_ptr[3];
}

sp = radius;
yp = radius;
if (yp > hm) yp = hm;
src_ptr = src + 4 * (x + yp * w); // img.pix_ptr(x, yp);
dst_ptr = src + 4 * x; 			  // img.pix_ptr(x, 0);
for (y = 0; y < h; y++)
{
dst_ptr[0] = (sum_r * mul_sum) >> shr_sum;
dst_ptr[1] = (sum_g * mul_sum) >> shr_sum;
dst_ptr[2] = (sum_b * mul_sum) >> shr_sum;
dst_ptr[3] = (sum_a * mul_sum) >> shr_sum;
dst_ptr += w4;

sum_r -= sum_out_r;
sum_g -= sum_out_g;
sum_b -= sum_out_b;
sum_a -= sum_out_a;

stack_start = sp + div - radius;
if (stack_start >= div) stack_start -= div;
stack_ptr = &stack[4 * stack_start];

sum_out_r -= stack_ptr[0];
sum_out_g -= stack_ptr[1];
sum_out_b -= stack_ptr[2];
sum_out_a -= stack_ptr[3];

if (yp < hm)
{
src_ptr += w4; // stride
++yp;
}

stack_ptr[0] = src_ptr[0];
stack_ptr[1] = src_ptr[1];
stack_ptr[2] = src_ptr[2];
stack_ptr[3] = src_ptr[3];

sum_in_r += src_ptr[0];
sum_in_g += src_ptr[1];
sum_in_b += src_ptr[2];
sum_in_a += src_ptr[3];
sum_r += sum_in_r;
sum_g += sum_in_g;
sum_b += sum_in_b;
sum_a += sum_in_a;

++sp;
if (sp >= div) sp = 0;
stack_ptr = &stack[sp * 4];

sum_out_r += stack_ptr[0];
sum_out_g += stack_ptr[1];
sum_out_b += stack_ptr[2];
sum_out_a += stack_ptr[3];
sum_in_r -= stack_ptr[0];
sum_in_g -= stack_ptr[1];
sum_in_b -= stack_ptr[2];
sum_in_a -= stack_ptr[3];
}
}
}

}

class MVImageUtilsStackBlurTask
{
public:
unsigned char* src;
unsigned int w;
unsigned int h;
unsigned int radius;
int cores;
int core;
int step;
unsigned char* stack;

inline MVImageUtilsStackBlurTask(unsigned char* src, unsigned int w, unsigned int h, unsigned int radius, int cores, int core, int step, unsigned char* stack)
{
this->src = src;
this->w = w;
this->h = h;
this->radius = radius;
this->cores = cores;
this->core = core;
this->step = step;
this->stack = stack;
}

inline void run()
{
stackblurJob(src, w, h, radius, cores, core, step, stack);
}

};

/// Stackblur algorithm by Mario Klingemann
/// Details here:
/// http://www.quasimondo.com/StackBlurForCanvas/StackBlurDemo.html /// C++ implemenation base from:
/// https://gist.github.com/benjamin9999/3809142 /// http://www.antigrain.com/__code/include/agg_blur.h.html /// This version works only with RGBA color
void 			   stackblur(unsigned char* src,				///< input image data
unsigned int w,					///< image width
unsigned int h,					///< image height
unsigned int radius,				///< blur intensity (should be in 2..254 range)
int cores = 1						///< number of threads (1 - normal single thread)
)
{
if (radius > 254) return;
if (radius < 2) return;

unsigned int div = (radius * 2) + 1;
unsigned char* stack = new unsigned char[div * 4 * cores];

if (cores == 1)
{
// no multithreading
stackblurJob(src, w, h, radius, 1, 0, 1, stack);
stackblurJob(src, w, h, radius, 1, 0, 2, stack);
}

delete[] stack;
}

/**
* Capture screen implementation, don't use it directly.
*/
void onCaptureScreen(const std::function<void(bool, Image*)>& afterCaptured, int iScale)
{
static bool startedCapture = false;

if (startedCapture)
{
CCLOG("Screen capture is already working");
if (afterCaptured)
{
afterCaptured(false, nullptr);
}
return;
}
else
{
startedCapture = true;
}

auto glView = Director::getInstance()->getOpenGLView();
auto frameSize = glView->getFrameSize();
#if (CC_TARGET_PLATFORM == CC_PLATFORM_MAC) || (CC_TARGET_PLATFORM == CC_PLATFORM_WIN32) || (CC_TARGET_PLATFORM == CC_PLATFORM_LINUX)
frameSize = frameSize * glView->getFrameZoomFactor() * glView->getRetinaFactor();
#endif

int width = static_cast<int>(frameSize.width);
int height = static_cast<int>(frameSize.height);

do
{
std::shared_ptr<GLubyte> buffer(new GLubyte[width * height * 4], [](GLubyte* p){ CC_SAFE_DELETE_ARRAY(p); });
if (!buffer)
{
break;
}

glPixelStorei(GL_PACK_ALIGNMENT, 1);
glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, buffer.get());

std::shared_ptr<GLubyte> flippedBuffer(new GLubyte[width * height * 4], [](GLubyte* p) { CC_SAFE_DELETE_ARRAY(p); });
if (!flippedBuffer)
{
break;
}

for (int row = 0; row < height; ++row)
{
memcpy(flippedBuffer.get() + (height - row - 1) * width * 4, buffer.get() + row * width * 4, width * 4);
}

/*-------------压缩start------------*/
unsigned long dst_width = width / iScale;
unsigned long dst_height = height / iScale;

std::shared_ptr<GLubyte> zipFlippedBuffer(new GLubyte[dst_width * dst_height * 4], [](GLubyte* p) { CC_SAFE_DELETE_ARRAY(p); });
if (!zipFlippedBuffer)
{
break;
}
unsigned long xrIntFloat_16 = (width << 16) / dst_width + 1;
unsigned long yrIntFloat_16 = (height << 16) / dst_height + 1;
unsigned long srcy_16 = 0;

unsigned long byte_width = 4;//单个数据的物理宽度 4字节
unsigned long byte_shift = 2;//单个数据的物理移位

auto beginPos = zipFlippedBuffer.get();

for (unsigned long y = 0; y < dst_height; ++y)
{
//auto pSrcLine = flippedBuffer.get() + width * byte_width * (srcy_16 >> 16);
auto pSrcLine = flippedBuffer.get() + (width<<2)*(srcy_16>>16);

unsigned long srcx_16 = 0;
for (unsigned long x = 0; x < dst_width; ++x)
{
//memcpy(beginPos + x * byte_width, pSrcLine + (srcx_16 >> 16)*byte_width, byte_width);
memcpy(beginPos + (x<<2), pSrcLine + ((srcx_16 >> 16)<<2), byte_width);
srcx_16 += xrIntFloat_16;
}
srcy_16 += yrIntFloat_16;
beginPos += (dst_width << byte_shift);
}
/*-------------压缩end------------*/

//使用算法一次性对图片进行高斯模糊
stackblur(zipFlippedBuffer.get(), dst_width, dst_height, 5);

Image* image = new (std::nothrow) Image;
if (image)
{
image->initWithRawData(zipFlippedBuffer.get(), dst_width * dst_height * 4 , dst_width, dst_height, 8);
image->autorelease();
if (afterCaptured)
{
afterCaptured(true, image);
}
}
else
{
CCLOG("Malloc Image memory failed!");
if (afterCaptured)
{
afterCaptured(false, nullptr);
}
delete image;
image = nullptr;
}
startedCapture = false;
} while (0);
}

/*
* 高斯模糊接口  缩放因子:iScale,截图会把全屏压缩为1/iScale大
*/
static EventListenerCustom* s_captureScreenListener;
static CustomCommand s_captureScreenCommand;
void Util::gaussianBlur(const std::function<void(bool, Image*)>& afterCaptured, int iScale /*= 4*/)
{
if (s_captureScreenListener)
{
CCLOG("Warning: CaptureScreen has been called already, don't call more than once in one frame.");
return;
}
s_captureScreenCommand.init(std::numeric_limits<float>::max());
s_captureScreenCommand.func = std::bind(onCaptureScreen, afterCaptured, iScale);
s_captureScreenListener = Director::getInstance()->getEventDispatcher()->addCustomEventListener(Director::EVENT_AFTER_DRAW, [](EventCustom *event) {
auto director = Director::getInstance();
director->getEventDispatcher()->removeEventListener((EventListener*)(s_captureScreenListener));
s_captureScreenListener = nullptr;
director->getRenderer()->addCommand(&s_captureScreenCommand);
director->getRenderer()->render();
});
}


以下是导出的lua接口:

#include "base/ccConfig.h"
#ifndef __game_custom_h__
#define __game_custom_h__

#ifdef __cplusplus
extern "C" {
#endif
#include "tolua++.h"
#ifdef __cplusplus
}
#endif

int register_all_game_custom(lua_State* tolua_S);

#endif // __game_custom_h__


static int tolua_pf_common_gaussianBlur(lua_State* tolua_S)
{
LUA_FUNCTION callbackHander = toluafix_ref_function(tolua_S, 2, 0);
if (callbackHander == 0)
{
CCLOG("tolua_pf_common_gaussianBlur : toluafix_ref_function , error");
return 0;
}

auto capture_callback = [=](bool succeed, Image* img){
auto luastack = LuaEngine::getInstance()->getLuaStack();

luastack->pushBoolean(succeed);
if (succeed){
luastack->pushObject(img, "cc.Image");
}
else{
luastack->pushNil();
}
luastack->executeFunctionByHandler(callbackHander, 2);
};

int argc = lua_gettop(tolua_S) - 1;
if (argc == 2)
{
int q = 4;
if (!luaval_to_int32(tolua_S, 3, &q))
{
CCLOG("tolua_pf_common_gaussianBlur : luaval_to_number , error");
return 0;
}
Util::gaussianBlur(capture_callback, q);
}
else
{
Util::gaussianBlur(capture_callback);
}
return 0;
}

TOLUA_API int register_all_game_custom(lua_State* tolua_S)
{
tolua_open(tolua_S);

tolua_module(tolua_S, "pf", 0);
tolua_beginmodule(tolua_S, "pf");

tolua_module(tolua_S, "Common", 0);
tolua_beginmodule(tolua_S, "Common");
{
tolua_function(tolua_S, "GaussianBlur", tolua_pf_common_gaussianBlur);
}
tolua_endmodule(tolua_S);

tolua_endmodule(tolua_S);
return 1;
}


使用方法:

local function onFinishCapture(ret,img)
if ret then
local texture = cc.Director:getInstance():getTextureCache():addImage(img, "capriteadu")
local spriteBlur = cc.Sprite:createWithTexture(texture)
local wSize = cc.Director:getInstance():getWinSize()
spriteBlur:setPosition(cc.p(wSize.width/2, wSize.height/2))
self:addChild(spriteBlur)
PF.UIEx.nodeToScaleForFixedSize(spriteBlur, wSize)
end
end
pf.Common:GaussianBlur(onFinishCapture, 4)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息