您的位置：首页 > 其它

Busybox支持中文的解决办法

2013-07-22 18:57 381 查看

在嵌入式linux系统中，busybox是最常见的用来构建文件系统的。可是从busybox1.17.0以上之后，对ls命令不做修改是无法显示中文的。就算是内核设置了支持中文的话，在shell下用ls命令也是无法显示中文的，这是因为busybox1.17.0以后版本对中文的支持进行了限制。现在就来讲讲如何修改让busybox1.17.0以上版本支持中文，要想让busybox1.17.0以上支持中文，需要修改两个文件：printable_string.c以及unicode.c
。下面来分析，为什么ls命令无法显示中文。请看printable_string.c未修改过的代码：

[cpp] view
plaincopy

const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)

{

static char *saved[4];

static unsigned cur_saved; /* = 0 */

char *dst;

const char *s;

s = str;

while (1) {

unsigned char c = *s;

if (c == '\0') {

/* 99+% of inputs do not need conversion */

if (stats) {

stats->byte_count = (s - str);

stats->unicode_count = (s - str);

stats->unicode_width = (s - str);

}

return str;

}

if (c < ' ')

break;

if (c >= 0x7f)

break;

s++;

}

#if ENABLE_UNICODE_SUPPORT

dst = unicode_conv_to_printable(stats, str);

#else

{

char *d = dst = xstrdup(str);

while (1) {

unsigned char c = *d;

if (c == '\0')

break;

if (c < ' ' || c >= 0x7f)

*d = '?';

d++;

}

if (stats) {

stats->byte_count = (d - dst);

stats->unicode_count = (d - dst);

stats->unicode_width = (d - dst);

}

}

#endif

free(saved[cur_saved]);

saved[cur_saved] = dst;

cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);

return dst;

}

从上面代码23和24行以及37和38行可以看出：大于0x7F的字符直接被break掉，或者直接被“？”代替了。所以就算是linux内核设置了支持中文，也是无法显示出来的，被“？”代替了。修改红色加粗的代码如下：

[cpp] view
plaincopy

const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)

{

static char *saved[4];

static unsigned cur_saved; /* = 0 */

char *dst;

const char *s;

s = str;

while (1) {

unsigned char c = *s;

if (c == '\0') {

/* 99+% of inputs do not need conversion */

if (stats) {

stats->byte_count = (s - str);

stats->unicode_count = (s - str);

stats->unicode_width = (s - str);

}

return str;

}

if (c < ' ')

break;

/*

if (c >= 0x7f)

break;

*/

s++;

}

#if ENABLE_UNICODE_SUPPORT

dst = unicode_conv_to_printable(stats, str);

#else

{

char *d = dst = xstrdup(str);

while (1) {

unsigned char c = *d;

if (c == '\0')

break;

if (c < ' ' /*|| c >= 0x7f */)

*d = '?';

d++;

}

if (stats) {

stats->byte_count = (d - dst);

stats->unicode_count = (d - dst);

stats->unicode_width = (d - dst);

}

}

#endif

free(saved[cur_saved]);

saved[cur_saved] = dst;

cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);

return dst;

}

经过以上的修改之后，同时busybox1.17.0配置的时候没有选中[] Support
Unicode的话，那么采用ls命令是可以看到中文的，这个我自己已经亲自测试过的。可是还有一种情况：busybox1.17.0在配置的时候选中了：[*] Support Unicode，见下：

[html] view
plaincopy

在配置里，有Support Unicode选上的：

Busybox Settings->General Configuration->

│ │[ ] Enable locale support (system needs locale for this to work) │ │

│ │[*] Support Unicode │ │

│ │[*] Support for --long-options │ │

那么这样还需要修改一个文件，这个文件就是：unicode.c。如果不修改这个文件，ls命令也是无法显示出中文的。见下未修改的代码：

[cpp] view
plaincopy

static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)

{

char *dst;

unsigned dst_len;

unsigned uni_count;

unsigned uni_width;

if (unicode_status != UNICODE_ON) {

char *d;

if (flags & UNI_FLAG_PAD) {

d = dst = xmalloc(width + 1);

while ((int)--width >= 0) {

unsigned char c = *src;

if (c == '\0') {

do

*d++ = ' ';

while ((int)--width >= 0);

break;

}

*d++ = (c >= ' ' && c < 0x7f) ? c : '?';

src++;

}

*d = '\0';

} else {

d = dst = xstrndup(src, width);

while (*d) {

unsigned char c = *d;

if (c < ' ' || c >= 0x7f)

*d = '?';

d++;

}

}

if (stats) {

stats->byte_count = (d - dst);

stats->unicode_count = (d - dst);

stats->unicode_width = (d - dst);

}

return dst;

}

dst = NULL;

uni_count = uni_width = 0;

dst_len = 0;

while (1) {

int w;

wchar_t wc;

#if ENABLE_UNICODE_USING_LOCALE

{

mbstate_t mbst = { 0 };

ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);

/* If invalid sequence is seen: -1 is returned,

* src points to the invalid sequence, errno = EILSEQ.

* Else number of wchars (excluding terminating L'\0')

* written to dest is returned.

* If len (here: 1) non-L'\0' wchars stored at dest,

* src points to the next char to be converted.

* If string is completely converted: src = NULL.

*/

if (rc == 0) /* end-of-string */

break;

if (rc < 0) { /* error */

src++;

goto subst;

}

if (!iswprint(wc))

goto subst;

}

#else

src = mbstowc_internal(&wc, src);

/* src is advanced to next mb char

* wc == ERROR_WCHAR: invalid sequence is seen

* else: wc is set

*/

if (wc == ERROR_WCHAR) /* error */

goto subst;

if (wc == 0) /* end-of-string */

break;

#endif

if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)

goto subst;

w = wcwidth(wc);

if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */

|| (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)

|| (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)

) {

subst:

wc = CONFIG_SUBST_WCHAR;

w = 1;

}

width -= w;

/* Note: if width == 0, we still may add more chars,

* they may be zero-width or combining ones */

if ((int)width < 0) {

/* can't add this wc, string would become longer than width */

width += w;

break;

}

uni_count++;

uni_width += w;

dst = xrealloc(dst, dst_len + MB_CUR_MAX);

#if ENABLE_UNICODE_USING_LOCALE

{

mbstate_t mbst = { 0 };

dst_len += wcrtomb(&dst[dst_len], wc, &mbst);

}

#else

dst_len += wcrtomb_internal(&dst[dst_len], wc);

#endif

}

/* Pad to remaining width */

if (flags & UNI_FLAG_PAD) {

dst = xrealloc(dst, dst_len + width + 1);

uni_count += width;

uni_width += width;

while ((int)--width >= 0) {

dst[dst_len++] = ' ';

}

}

dst[dst_len] = '\0';

if (stats) {

stats->byte_count = dst_len;

stats->unicode_count = uni_count;

stats->unicode_width = uni_width;

}

return dst;

}

见上面20行和28行，需要修改一下，修改后的代码见下：

[cpp] view
plaincopy

static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)

{

char *dst;

unsigned dst_len;

unsigned uni_count;

unsigned uni_width;

if (unicode_status != UNICODE_ON) {

char *d;

if (flags & UNI_FLAG_PAD) {

d = dst = xmalloc(width + 1);

while ((int)--width >= 0) {

unsigned char c = *src;

if (c == '\0') {

do

*d++ = ' ';

while ((int)--width >= 0);

break;

}

*d++ = (c >= ' '/* && c < 0x7f */) ? c : '?';

src++;

}

*d = '\0';

} else {

d = dst = xstrndup(src, width);

while (*d) {

unsigned char c = *d;

if (c < ' '/* || c >= 0x7f */)

*d = '?';

d++;

}

}

if (stats) {

stats->byte_count = (d - dst);

stats->unicode_count = (d - dst);

stats->unicode_width = (d - dst);

}

return dst;

}

dst = NULL;

uni_count = uni_width = 0;

dst_len = 0;

while (1) {

int w;

wchar_t wc;

#if ENABLE_UNICODE_USING_LOCALE

{

mbstate_t mbst = { 0 };

ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);

/* If invalid sequence is seen: -1 is returned,

* src points to the invalid sequence, errno = EILSEQ.

* Else number of wchars (excluding terminating L'\0')

* written to dest is returned.

* If len (here: 1) non-L'\0' wchars stored at dest,

* src points to the next char to be converted.

* If string is completely converted: src = NULL.

*/

if (rc == 0) /* end-of-string */

break;

if (rc < 0) { /* error */

src++;

goto subst;

}

if (!iswprint(wc))

goto subst;

}

#else

src = mbstowc_internal(&wc, src);

/* src is advanced to next mb char

* wc == ERROR_WCHAR: invalid sequence is seen

* else: wc is set

*/

if (wc == ERROR_WCHAR) /* error */

goto subst;

if (wc == 0) /* end-of-string */

break;

#endif

if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)

goto subst;

w = wcwidth(wc);

if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */

|| (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)

|| (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)

) {

subst:

wc = CONFIG_SUBST_WCHAR;

w = 1;

}

width -= w;

/* Note: if width == 0, we still may add more chars,

* they may be zero-width or combining ones */

if ((int)width < 0) {

/* can't add this wc, string would become longer than width */

width += w;

break;

}

uni_count++;

uni_width += w;

dst = xrealloc(dst, dst_len + MB_CUR_MAX);

#if ENABLE_UNICODE_USING_LOCALE

{

mbstate_t mbst = { 0 };

dst_len += wcrtomb(&dst[dst_len], wc, &mbst);

}

#else

dst_len += wcrtomb_internal(&dst[dst_len], wc);

#endif

}

/* Pad to remaining width */

if (flags & UNI_FLAG_PAD) {

dst = xrealloc(dst, dst_len + width + 1);

uni_count += width;

uni_width += width;

while ((int)--width >= 0) {

dst[dst_len++] = ' ';

}

}

dst[dst_len] = '\0';

if (stats) {

stats->byte_count = dst_len;

stats->unicode_count = uni_count;

stats->unicode_width = uni_width;

}

return dst;

}

经过以上修改之后，就算配置支持Unicode，ls命令也是可以支持中文的。同时也可以进入中文目录可以文件夹。

转载自：wavemcu的CSDN博客

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航