PSP《大众高尔夫2P》XB资源包算法分析(3)
2010-08-01 11:11
134 查看
反编译过程
这时需要完成的工作是用C语言将汇编代码的行为进行模拟。一本MIPS指令集手册是必须的,可以通过下面的URL获得:http://dkrizanc.web.wesleyan.edu/courses/231/07/MIPS_Vol2.pdf
手册名为:《MIPS32™ Architecture For Programmers Volume II: The MIPS32™ Instruction Set》
当然也有工具可以帮助我们完成这项工作:
rec,这是一个闭源的工具。
http://www.backerstreet.com/rec/rec.htm
pspdecompiler可以用在prx文件上,我试了下对eboot好像无法使用。其源代码可以通过下面URL获取:http://repo.or.cz/w/pspdecompiler.git
但我们还是建议大家手动的完成这项工作,这样对算法的理解也会有帮助。这里还建议大家使用IDA里图形的方式(Graph view)方式来阅读汇编代码。
编写模拟代码 下面是我列出来的模拟代码,供大家参考:
1: void lzss_decoder(u8* des, u8* src, int len)
2: {
3: u32 t0, t1, t2, t3;
4: u32 a0, a1, a2, a3, a4;
5: u32 v1;
6: v1 = des+len;
7:
8: if(len<0)
9: {
10: return;
11: }
12:
13: t0 = 1;
14: t1 = *src;
15:
16: loc_882ADEC:
17: a2 = t1 & 3;
18: src ++;
19: if(a2 != 0)
20: {
21: a2 = t1 & 1;
22: if(a2 != t0)
23: {
24: a3 = *src;
25: a2 = *(src+1);
26: a3 = a3 << 8;
27: a2= a2 << 16;
28: a2 = a3 + a2;
29: a3 = t1 + a2;
30: a2 = (a3 << 2) & 0x3ff;
31: t1 = a2 + 3;
32: a2 = a3 >> 12;
33: t2 = a0 - a2;
34: src +=2;
35: }
36: else
37: {
38: a2 = *src;
39: a2 = a2 << 8;
40: a3 = t1 + a2;
41: a2 = (a3 >> 1) & 0x7;
42: t1 = a2 + 3;
43: a2 = a3 >> 4;
44: t2 = des - a2;
45: src++;
46: }
47: a2 = t1;
48: t1 --;
49: if(a2 > 0)
50: {
51: loc_882AE90:
52: a3 = *(u8 *) t2;
53: a2 = t1;
54: t1 --;
55: *dst = a3;
56: t2 ++;
57: dst++;
58: if(a2>0)
59: {
60: goto loc_882AE90;
61: }
62: }
63: loc_882AEAC:
64: a2 = 1:0 ? (dst < v1);
65: }
66: else
67: {
68: a2 = t1 >> 2;
69: a2 ++;
70: t1 = a2 -1;
71: if(a2 > 0)
72: {
73: loc_882AE08:
74: a3 = *src;
75: a2 = t1;
76: t1 --;
77: *des = a3;
78: des++;
79: src++;
80: if(a2>0)
81: {
82: goto loc_882AE08;
83: }
84: a2 = 1:0 ? (a0 85: }
86: else
87: {
88: goto loc_882AEAC;
89: }
90: }
91: if(a2 != 0)
92: {
93: t1 = *src;
94: goto loc_882ADEC;
95: }
96: else
97: {
98: return;
99: }
100: }
.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }
恢复高级语言特征
下面的工作是恢复高级语言特征。
1. 压缩赋值语句的行数
2. 根据循环条件,将goto语句还原为while或for
1: void lzss_decoder(u8* des, u8* src, int len)
2: {
3: u32 t0, t1, t2, t3;
4: u32 a0, a1, a2, a3, a4;
5: u8 *end, type;
6:
7:end = des + len;
8: if(len<0)
9: {
10: return;
11: }
12:t0 = 1;
13: type = *src;
14:
15: loc_882ADEC:
16: if(0 == (type & 3))
17: {
18: a2 = type >> 2;
19: a2++;
20: for(i=0; i 21: {
22: *des = *src;
23: des++;
24: src++;
25: }
26: goto loc_882AEAC;
27: }
28: else if(t0 == (type & 1))
29: {
30: a2 = *src<<8 + (*(src+1)<<16);
31: a3 = type + a2;
32: a2 = (a3<<2) & 0x3ff;
33: type = a2 + 3;
34: t2 = des - (a3>>12);
35: src += 2;
36: }
37: else
38: {
39: a2 = (*src << 8);
40: a3 = a2 + type;
41: a2 = (a3 >> 1) & 0x7;
42: type = a2 + 3;
43: a2 = a3 >> 4;
44: t2 = des - a2;
45: src++;
46: }
47: for(i=0; i 48: {
49: *dst = *(u8 *)t2;
50: t2++;
51: dst++;
52: }
53:
54: loc_882AEAC:
55: if(dst < end)
56: {
57: type = *src;
58: goto loc_882ADEC;
59: }
60: return;
61: }
.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }
增加可读性
下面要做的工作是看懂算法,为变量命名和调整代码结构,使其具有可读性。
最终的代码:
1: void lzss_decoder(u8* dst, u8* src, int dst_len)
2: {
3: u8 *end, *offset, type;
4: u32 len;
5: int i;
6:
7:end = dst + dst_len;
8:
9: if(dst_len<0)
10: {
11: return;
12:}
13:
14: while(dst < end)
15: {
16: type = *src++;
17: if(0 != (type & 0x3)) /* handle compressed data */
18: {
19: if(1 != (type & 0x1))
20: {
21: len = type | (*src<<8) | (*(src+1)<<16);
22: offset = dst - (len>>12);
23: len = ((len<<2) & 0x3ff) + 3;
24: src += 2;
25: }
26: else
27: {
28: len = type | (*src << 8);
29: offset = dst - (len>>4);
30: len = ((len>>1) & 0x7) + 3;
31: src++;
32: }
33: for(i=0; i 34: {
35: *dst++ = *offset++;
36: }
37: }
38: else /* handle uncompressed data */
39: {
40: len = type >> 2;
41: len += 1;
42: for(i=0; i 43: {
44: *dst++ = *src++;
45: }
46: }
47: }
48: return;
49: }
.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }
算法功能验证
我们还需要验证代码的正确性。这时上面dump的内存文件leaveLzssDecoder.bin就有用了。
1: /* codecVerify.c */
2: #include
3:
4: typedef unsigned int u32;
5: typedef unsigned short u16;
6: typedef unsigned char u8;
7:
8: int load_psp_memfile(char *path, u32* buf);
9: void mem_check(u8* dst, u8* src, int len);
10: void lzss_decoder(u8* dst, u8* src, int dst_len);
11:
12: #define ADDRMAP(a,b) (b - 0x08800000 + a)
13:
14: int main(int argc, char * argv[])
15: {
16: int ret = 0;
17: u8 *pspmem = NULL;
18: u8 *dst = NULL;
19: ret = load_psp_memfile("leaveLzssDecoder.bin", &pspmem);
20: if(0 != ret)
21: {
22: printf("Open file [%s] failed/n", "leaveLzssDecoder.bin");
23: return ret;
24: }
25: dst = malloc(0x0000010E);
26: memset(dst, 0, 0x0000010E);
27: //a0:0x08B39798 a1:0x08BE4800
28: lzss_decoder(dst, ADDRMAP(pspmem, 0x08BE4800), 0x0000010E);
29: mem_check(dst, ADDRMAP(pspmem, 0x08B39798), 0x0000010E);
30: free(pspmem);
31: return ret;
32: }
33:
34: int load_psp_memfile(char *path, u32* buf)
35: {
36: FILE* fp = NULL;
37: int size;
38:
39: fp = fopen(path, "rb");
40: if(NULL == fp)
41: return -1;
42: fseek(fp, 0, SEEK_END);
43: size = ftell(fp);
44: fseek(fp, 0, SEEK_SET);
45: *buf = malloc(size);
46: fread(*buf, size, 1, fp);
47: fclose(fp);
48: }
49:
50: void mem_check(u8* dst, u8* src, int len)
51: {
52: int i;
53:for(i=0; i 54: {
55: if(*(dst+i) != *(src+i))
56: {
57: printf("memory check failed/n offset[0x%x] dst[0x%x]!=src[0x%x]/n", i, *(dst+i), *(src+i));
58: return;
59: }
60: }
61: printf("memory check pass!/n");
62: }
在这个阶段我发现我将
len = ((len>>2) & 0x3ff) + 3;
这句话误写成了
len = ((len<<2) & 0x3ff) + 3;
从汇编到c模拟,直到最终的验证,其中每个阶段都可能人为的引入错误,所以如果发现应及时更正,以至于不会将错误放大到后面的阶段。
算法总结
Lzss算法是一种匹配串的替换算法。其变种主要发生在下面几点:1. 搜索窗的尺寸
2. 匹配串长度的下限和上限
3. 码字和未压缩数据的区分方式
4. 码字中偏移地址和字串长度的组织方式
WIKI指定的LZSS算法参考代码为Allegro
http://alleg.svn.sourceforge.net/viewvc/alleg/allegro/branches/4.2/src/lzss.c?revision=7522&view=markup
该变种算法中根据偏移地址的范围做了2种不同的编码方式。通过码字最低的2个bits来区分是否经过压缩以及编码的方式。仔细阅读代码可以获得下面的编码参数。
#define N 4095 /* buffers for LZ compression */
#define F1 10 /* upper limit for LZ match length for 16bits */
#define F2 1026 /* upper limit for LZ match length for 24bits */
#define THRESHOLD 2 /* LZ encode string into pos and length */
.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }
有了上面的参数实现encoder也就比较简单了。这里需要指出的是,不同的encoder实现最终产生的压缩信息会不一样,我们不要求和原版一致,但求压缩率以及代码的执行效率。
算法0x20(lzss)
因为loading.xb中只包含0x20这一种算法。所以我们在第一个文件压缩内容的偏移地址设置读断点:0x26*4= 0x98
0x08BE47C0 + 0x98 = 0x08BE4858
bpset 0x08BE4858 r EPC - 0x0882AEDC |
bpset 0x08BE4860 r EPC - 0x0882ADE8 |
经过试验验证,0x20这种lzss和文件名列表部分的压缩采用同样的算法。
相关文章推荐
- PSP《大众高尔夫2P》XB资源包算法分析(1)
- PSP《大众高尔夫2P》XB资源包算法分析(2)
- PSP《大众高尔夫2P》XB资源包算法分析(4)
- Master原理剖析与源码分析:资源调度机制源码分析(schedule(),两种资源调度算法)
- Spark资源调度机制源码分析--基于spreadOutApps及非spreadOutApps两种资源调度算法
- 网络分析优化旅行商问题TSP算法资源
- Spark源码分析之Master资源调度算法原理
- 网络分析优化多旅行商(物流配送)算法资源
- DPM算法详解 DPM算法代码 实验分析 资源下载
- Android中的资源分析
- 一步步学算法(算法分析)---6(贪心算法)
- 三个博弈论算法分析
- 可达性分析算法-确定那些对象是垃圾(转)
- (转)如何分析分治型算法性能
- mahout源码canopy算法分析之三CanopyReducer
- 算法和算法分析总结
- 在Android library中不能使用switch-case语句访问资源ID的原因分析及解决方案
- 算法分析数学基础(2)
- 算法分析与设计课程——LeetCode刷题之Reverse Integer
- 机器学习(11.3)--神经网络(nn)算法的深入与优化(3) -- QuadraticCost(二次方代价函数)数理分析