您的位置：首页 > 其它

PSP《大众高尔夫2P》XB资源包算法分析（3）

2010-08-01 11:11 134 查看

反编译过程

这时需要完成的工作是用C语言将汇编代码的行为进行模拟。一本MIPS指令集手册是必须的，可以通过下面的URL获得：
http://dkrizanc.web.wesleyan.edu/courses/231/07/MIPS_Vol2.pdf
手册名为：《MIPS32™ Architecture For Programmers Volume II: The MIPS32™ Instruction Set》
当然也有工具可以帮助我们完成这项工作：
rec，这是一个闭源的工具。
http://www.backerstreet.com/rec/rec.htm
pspdecompiler可以用在prx文件上，我试了下对eboot好像无法使用。其源代码可以通过下面URL获取：http://repo.or.cz/w/pspdecompiler.git
但我们还是建议大家手动的完成这项工作，这样对算法的理解也会有帮助。这里还建议大家使用IDA里图形的方式（Graph view）方式来阅读汇编代码。
编写模拟代码 下面是我列出来的模拟代码，供大家参考：

1:  void lzss_decoder(u8* des, u8* src, int len)

2:  {

3:      u32 t0, t1, t2, t3;

4:      u32 a0, a1, a2, a3, a4;

5:      u32 v1;

6:      v1 = des+len;

7:

8:      if(len<0)

9:      {

10:          return;

11:      }

12:

13:      t0 = 1;

14:      t1 = *src;

15:

16:  loc_882ADEC:

17:      a2 = t1 & 3;

18:      src ++;

19:      if(a2 != 0)

20:      {

21:          a2 = t1 & 1;

22:          if(a2 != t0)

23:          {

24:              a3 = *src;

25:              a2 = *(src+1);

26:              a3 = a3 << 8;

27:              a2= a2 << 16;

28:              a2 = a3 + a2;

29:              a3 = t1 + a2;

30:              a2 = (a3 << 2) & 0x3ff;

31:              t1 = a2 + 3;

32:              a2 = a3 >> 12;

33:              t2 = a0 - a2;

34:              src +=2;

35:          }

36:          else

37:          {

38:              a2 = *src;

39:              a2 = a2 << 8;

40:              a3 = t1 + a2;

41:              a2 = (a3 >> 1) & 0x7;

42:              t1 = a2 + 3;

43:              a2 = a3 >> 4;

44:              t2 = des - a2;

45:              src++;

46:          }

47:          a2 = t1;

48:          t1 --;

49:          if(a2 > 0)

50:          {

51:  loc_882AE90:

52:              a3 = *(u8 *) t2;

53:              a2 = t1;

54:              t1 --;

55:              *dst = a3;

56:              t2 ++;

57:              dst++;

58:              if(a2>0)

59:              {

60:                  goto loc_882AE90;

61:              }

62:          }

63:  loc_882AEAC:

64:          a2 = 1:0 ? (dst < v1);

65:      }

66:      else

67:      {

68:          a2 = t1 >> 2;

69:          a2 ++;

70:          t1 = a2 -1;

71:          if(a2 > 0)

72:          {

73:  loc_882AE08:

74:              a3 = *src;

75:              a2 = t1;

76:              t1 --;

77:              *des = a3;

78:              des++;

79:              src++;

80:              if(a2>0)

81:              {

82:                  goto loc_882AE08;

83:              }

84:              a2 = 1:0 ? (a0  85:          }

86:          else

87:          {

88:              goto loc_882AEAC;

89:          }

90:      }

91:      if(a2 != 0)

92:      {

93:          t1 = *src;

94:          goto loc_882ADEC;

95:      }

96:      else

97:      {

98:          return;

99:      }

100:  }

.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }

恢复高级语言特征

下面的工作是恢复高级语言特征。

1. 压缩赋值语句的行数

2. 根据循环条件，将goto语句还原为while或for

1:  void lzss_decoder(u8* des, u8* src, int len)

2:  {

3:      u32 t0, t1, t2, t3;

4:      u32 a0, a1, a2, a3, a4;

5:      u8 *end, type;

6:

7:end = des + len;

8:      if(len<0)

9:      {

10:          return;

11:      }

12:t0 = 1;

13:      type = *src;

14:

15:  loc_882ADEC:

16:      if(0 == (type & 3))

17:      {

18:          a2 = type >> 2;

19:          a2++;

20:          for(i=0; i  21:          {

22:              *des = *src;

23:              des++;

24:              src++;

25:          }

26:          goto loc_882AEAC;

27:      }

28:      else if(t0 == (type & 1))

29:      {

30:          a2 = *src<<8 + (*(src+1)<<16);

31:          a3 = type + a2;

32:          a2 = (a3<<2) & 0x3ff;

33:          type = a2 + 3;

34:          t2 = des - (a3>>12);

35:          src += 2;

36:      }

37:      else

38:      {

39:          a2 = (*src << 8);

40:          a3 = a2 + type;

41:          a2 = (a3 >> 1) & 0x7;

42:          type = a2 + 3;

43:          a2 = a3 >> 4;

44:          t2 = des - a2;

45:          src++;

46:      }

47:      for(i=0; i  48:      {

49:          *dst = *(u8 *)t2;

50:          t2++;

51:          dst++;

52:      }

53:

54:  loc_882AEAC:

55:      if(dst < end)

56:      {

57:          type = *src;

58:          goto loc_882ADEC;

59:      }

60:      return;

61:  }

.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }

增加可读性

下面要做的工作是看懂算法，为变量命名和调整代码结构，使其具有可读性。

最终的代码：

1:  void lzss_decoder(u8* dst, u8* src, int dst_len)

2:  {

3:      u8 *end, *offset, type;

4:      u32 len;

5:      int i;

6:

7:end = dst + dst_len;

8:

9:      if(dst_len<0)

10:      {

11:          return;

12:}

13:

14:      while(dst < end)

15:      {

16:          type = *src++;

17:          if(0 != (type & 0x3)) /* handle compressed data */

18:          {

19:              if(1 != (type & 0x1))

20:              {

21:                  len = type | (*src<<8) | (*(src+1)<<16);

22:                  offset = dst -  (len>>12);

23:                  len = ((len<<2) & 0x3ff) + 3;

24:                  src += 2;

25:              }

26:              else

27:              {

28:                  len = type | (*src << 8);

29:                  offset = dst - (len>>4);

30:                  len = ((len>>1) & 0x7) + 3;

31:                  src++;

32:              }

33:              for(i=0; i  34:              {

35:                  *dst++ = *offset++;

36:              }

37:          }

38:          else /* handle uncompressed data */

39:          {

40:              len = type >> 2;

41:              len += 1;

42:              for(i=0; i  43:              {

44:                  *dst++ = *src++;

45:              }

46:          }

47:      }

48:      return;

49:  }

.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }

算法功能验证

我们还需要验证代码的正确性。这时上面dump的内存文件leaveLzssDecoder.bin就有用了。

1:  /* codecVerify.c */

2:  #include

3:

4:  typedef unsigned int   u32;

5:  typedef unsigned short u16;

6:  typedef unsigned char  u8;

7:

8:  int load_psp_memfile(char *path, u32* buf);

9:  void mem_check(u8* dst, u8* src, int len);

10:  void lzss_decoder(u8* dst, u8* src, int dst_len);

11:

12:  #define ADDRMAP(a,b) (b - 0x08800000 + a)

13:

14:  int main(int argc, char * argv[])

15:  {

16:      int ret = 0;

17:      u8 *pspmem = NULL;

18:      u8 *dst = NULL;

19:      ret = load_psp_memfile("leaveLzssDecoder.bin", &pspmem);

20:      if(0 != ret)

21:      {

22:          printf("Open file [%s] failed/n", "leaveLzssDecoder.bin");

23:          return ret;

24:      }

25:      dst = malloc(0x0000010E);

26:      memset(dst, 0, 0x0000010E);

27:      //a0:0x08B39798 a1:0x08BE4800

28:      lzss_decoder(dst, ADDRMAP(pspmem, 0x08BE4800), 0x0000010E);

29:      mem_check(dst, ADDRMAP(pspmem, 0x08B39798), 0x0000010E);

30:      free(pspmem);

31:      return ret;

32:  }

33:

34:  int load_psp_memfile(char *path, u32* buf)

35:  {

36:      FILE* fp = NULL;

37:      int size;

38:

39:      fp = fopen(path, "rb");

40:      if(NULL == fp)

41:          return -1;

42:      fseek(fp, 0, SEEK_END);

43:      size = ftell(fp);

44:      fseek(fp, 0, SEEK_SET);

45:      *buf = malloc(size);

46:      fread(*buf, size, 1, fp);

47:      fclose(fp);

48:  }

49:

50:  void mem_check(u8* dst, u8* src, int len)

51:  {

52:      int i;

53:for(i=0; i  54:      {

55:          if(*(dst+i) != *(src+i))

56:          {

57:              printf("memory check failed/n offset[0x%x] dst[0x%x]!=src[0x%x]/n", i, *(dst+i), *(src+i));

58:              return;

59:          }

60:      }

61:      printf("memory check pass!/n");

62:  }

在这个阶段我发现我将

len = ((len>>2) & 0x3ff) + 3;

这句话误写成了

len = ((len<<2) & 0x3ff) + 3;

从汇编到c模拟，直到最终的验证，其中每个阶段都可能人为的引入错误，所以如果发现应及时更正，以至于不会将错误放大到后面的阶段。

算法总结

Lzss算法是一种匹配串的替换算法。其变种主要发生在下面几点：

1. 搜索窗的尺寸

2. 匹配串长度的下限和上限

3. 码字和未压缩数据的区分方式

4. 码字中偏移地址和字串长度的组织方式

WIKI指定的LZSS算法参考代码为Allegro

http://alleg.svn.sourceforge.net/viewvc/alleg/allegro/branches/4.2/src/lzss.c?revision=7522&view=markup

该变种算法中根据偏移地址的范围做了2种不同的编码方式。通过码字最低的2个bits来区分是否经过压缩以及编码的方式。仔细阅读代码可以获得下面的编码参数。

#define N                4095           /* buffers for LZ compression */

#define F1                10             /* upper limit for LZ match length for 16bits */

#define F2             1026             /* upper limit for LZ match length for 24bits */

#define THRESHOLD        2              /* LZ encode string into pos and length */

.csharpcode, .csharpcode pre
{
font-size: small;
color: black;
font-family: consolas, "Courier New", courier, monospace;
background-color: #ffffff;
/*white-space: pre;*/
}
.csharpcode pre { margin: 0em; }
.csharpcode .rem { color: #008000; }
.csharpcode .kwrd { color: #0000ff; }
.csharpcode .str { color: #006080; }
.csharpcode .op { color: #0000c0; }
.csharpcode .preproc { color: #cc6633; }
.csharpcode .asp { background-color: #ffff00; }
.csharpcode .html { color: #800000; }
.csharpcode .attr { color: #ff0000; }
.csharpcode .alt
{
background-color: #f4f4f4;
width: 100%;
margin: 0em;
}
.csharpcode .lnum { color: #606060; }

有了上面的参数实现encoder也就比较简单了。这里需要指出的是，不同的encoder实现最终产生的压缩信息会不一样，我们不要求和原版一致，但求压缩率以及代码的执行效率。

算法0x20（lzss）

因为loading.xb中只包含0x20这一种算法。

所以我们在第一个文件压缩内容的偏移地址设置读断点：0x26*4= 0x98

0x08BE47C0 + 0x98 = 0x08BE4858

bpset 0x08BE4858 r

EPC - 0x0882AEDC

跳过8个字节的长度信息，来到压缩数据部分：

bpset 0x08BE4860 r

EPC - 0x0882ADE8

前一个断点因为在读取长度，经分析是算法函数的调用者，后一个是lzss_decoder函数的领空。

经过试验验证，0x20这种lzss和文件名列表部分的压缩采用同样的算法。

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航