您的位置:首页 > 其它

使用libzip读取修改zip文件comment的方法(支持添加扩展字符)

2014-10-01 12:13 946 查看
2014-10-01 wcdj

摘要:本文使用libzip读取和修改zip文件comment的方法,并支持添加扩展字符。前提是需要修改libzip的源码支持可以添加扩展字符, 否则_zip_guess_encoding会判断出错(修改代码 zip_set_archive_comment.c:65)。

0 方法

(1) 首先要安装zlib和libzip编译环境:

初始化编译libzip

./configure --prefix="/Users/gerryyang/LAMP/libzip/install/libzip-0.11.2" --with-zlib="/Users/gerryyang/LAMP/zlib/install/zlib-1.2.8"

(2) 修改libzip的源码:

去除对comment编码格式的判断,zip_set_archive_comment.c:65

ZIP_EXTERN int
zip_set_archive_comment(struct zip *za, const char *comment, zip_uint16_t len)
{
	struct zip_string *cstr;

	if (ZIP_IS_RDONLY(za)) {
		_zip_error_set(&za->error, ZIP_ER_RDONLY, 0);
		return -1;
	}

	if (len > 0 && comment == NULL) {
		_zip_error_set(&za->error, ZIP_ER_INVAL, 0);
		return -1;
	}

	if (len > 0) {
		if ((cstr=_zip_string_new((const zip_uint8_t *)comment, len, ZIP_FL_ENC_GUESS, &za->error)) == NULL)
		{
			printf("_zip_string_new err\n");
			return -1;
		}

#if 0
		if (_zip_guess_encoding(cstr, ZIP_ENCODING_UNKNOWN) == ZIP_ENCODING_CP437) {
			printf("_zip_guess_encoding err\n");
			_zip_string_free(cstr);
			_zip_error_set(&za->error, ZIP_ER_INVAL, 0);
			return -1;
		}
#endif
	}
	else
		cstr = NULL;

	_zip_string_free(za->comment_changes);
	za->comment_changes = NULL;

	if (((za->comment_orig && _zip_string_equal(za->comment_orig, cstr))
				|| (za->comment_orig == NULL && cstr == NULL))) {
		_zip_string_free(cstr);
		za->comment_changed = 0;
	}
	else {
		za->comment_changes = cstr;
		za->comment_changed = 1;
	}

	return 0;
}


zip_utf-8.c:119

enum zip_encoding_type
_zip_guess_encoding(struct zip_string *str, enum zip_encoding_type expected_encoding)
{
	enum zip_encoding_type enc;
	const zip_uint8_t *name;
	zip_uint32_t i, j, ulen;

	if (str == NULL)
		return ZIP_ENCODING_ASCII;

	name = str->raw;

	if (str->encoding != ZIP_ENCODING_UNKNOWN)
		enc = str->encoding;
	else {
		enc = ZIP_ENCODING_ASCII;
		for (i=0; i<str->length; i++) {
			if ((name[i] > 31 && name[i] < 128) || name[i] == '\r' || name[i] == '\n' || name[i] == '\t')
				continue;

			enc = ZIP_ENCODING_UTF8_GUESSED;
			if ((name[i] & UTF_8_LEN_2_MASK) == UTF_8_LEN_2_MATCH)
				ulen = 1;
			else if ((name[i] & UTF_8_LEN_3_MASK) == UTF_8_LEN_3_MATCH)
				ulen = 2;
			else if ((name[i] & UTF_8_LEN_4_MASK) == UTF_8_LEN_4_MATCH)
				ulen = 3;
			else {
				enc = ZIP_ENCODING_CP437;
				break;
			}

			if (i + ulen >= str->length) {
				enc = ZIP_ENCODING_CP437;
				break;
			}

			for (j=1; j<=ulen; j++) {
				if ((name[i+j] & UTF_8_CONTINUE_MASK) != UTF_8_CONTINUE_MATCH) {
					enc = ZIP_ENCODING_CP437;
					goto done;
				}
			}
			i += ulen;
		}
	}

done:
	str->encoding = enc;

	if (expected_encoding != ZIP_ENCODING_UNKNOWN) {
		if (expected_encoding == ZIP_ENCODING_UTF8_KNOWN && enc == ZIP_ENCODING_UTF8_GUESSED)
			str->encoding = enc = ZIP_ENCODING_UTF8_KNOWN;

		if (expected_encoding != enc && enc != ZIP_ENCODING_ASCII)
			return ZIP_ENCODING_ERROR;
	}

	return enc;
}


1 测试代码

参考代码:

https://github.com/gerryyang/mac-utils/tree/master/tools/libzip/src

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <errno.h>
#include "zip.h"
using namespace std;

int encode_hex_string(const unsigned char *src, int len, unsigned char *dst)
{
	unsigned char szHexTable[] = "0123456789ABCDEF";

	for (int i = 0; i < len; ++i)
	{
		*dst = szHexTable[(src[i] >> 4) & 0x0f];
		++dst;
		*dst = szHexTable[src[i] & 0x0f];
		++dst;
	}
	*dst = '\0';

	return 0;
}

int main(int argc, char* argv[])
{

	if (argc < 3)
	{
		printf("usage: %s zipfile zipcomment\n", argv[0]);
		exit(1);
	}

	string zipfile = argv[1];
	string zipcomment = argv[2];
	int error;

	struct zip * zipfd = zip_open(zipfile.c_str(), ZIP_CHECKCONS, &error);
	if (zipfd == NULL)
	{
		switch (error)
		{
		case ZIP_ER_NOENT:
			printf("The file specified by path does not exist and ZIP_CREATE is not set [%d]\n", error);
			break;
		case ZIP_ER_EXISTS:
			printf("The file specified by path exists and ZIP_EXCL is set [%d]\n", error);
			break;
		case ZIP_ER_INVAL:
			printf("The path argument is NULL [%d]\n", error);
			break;
		case ZIP_ER_NOZIP:
			printf("The file specified by path is not a zip archive [%d]\n", error);
			break;
		case ZIP_ER_OPEN:
			printf("The file specified by path could not be opened [%d]\n", error);
			break;
		case ZIP_ER_READ:
			printf("A read error occurred; see errno for details [%d]\n", error);
			break;
		case ZIP_ER_SEEK:
			printf("The file specified by path does not allow seeks [%d]\n", error);
			break;
		default:
			printf("unknown err [%d]\n", error);
			break;
		}
		exit(1);
	}

	// get the comment for the entire zip archive
	int commentlen = 0;
	const char * comment = zip_get_archive_comment(zipfd, &commentlen, ZIP_FL_ENC_RAW);
	if (comment == NULL)
	{
		printf("zip_get_archive_comment get null or err[%d:%s]\n", errno, strerror(errno));
	}
	else 
	{
		printf("zip_get_archive_comment[%d:%s]\n", commentlen, comment);
		char copy[1024] = {0};
		memcpy(copy, comment, commentlen);
		unsigned char hex[1024] = {0};
		encode_hex_string((unsigned char *)copy, commentlen, hex);
		printf("zip_get_archive_comment hex[%d:%s]\n", commentlen, hex);
	}

	// Midas Header
	// idx:0 bytes:2 0X96FA
	// idx:2 bytes:2 comment len = strlen(channelId) + 0D0A
	// idx:4 bytes:N channelId=xxx
	// idx:4+N bytes:2 end:0X0D0A
	char dstcomment[1024] = {0};
	zip_uint16_t dstlen = 0;
	memset(dstcomment + dstlen, 0XFA, 1);
	dstlen += 1;
	memset(dstcomment + dstlen, 0X96, 1);
	dstlen += 1;
	memset(dstcomment + dstlen, (zipcomment.length() + 2) % 0XFF, 1);// 0D0A
	dstlen += 1;
	memset(dstcomment + dstlen, (zipcomment.length() + 2) / 0XFF, 1);
	dstlen += 1;
	memcpy(dstcomment + dstlen, zipcomment.data(), zipcomment.length());
	dstlen += zipcomment.length();
	memset(dstcomment + dstlen, 0X0D, 1);
	dstlen += 1;
	memset(dstcomment + dstlen, 0X0A, 1);
	dstlen += 1;

	unsigned char hex[1024] = {0};
	encode_hex_string((unsigned char *)dstcomment, dstlen, hex);
	printf("zip_set_archive_comment hex[%d:%s]\n", dstlen, hex);

	// sets the comment for the entire zip archive
	// If comment is NULL and len is 0, the archive comment will be removed
	// comment must be encoded in ASCII or UTF-8
	int iret = zip_set_archive_comment(zipfd, dstcomment, dstlen);// err !!!
	if (iret != 0)
	{
		printf("zip_set_archive_comment err[%d:%s]\n", iret, strerror(errno));
		switch (iret)
		{
		case ZIP_ER_INVAL:
			printf("zip_set_archive_comment: len is less than 0 or longer than the maximum comment length in a zip file (65535), or comment is not a valid UTF-8 encoded string\n");
			break;
		case ZIP_ER_MEMORY:
			printf("zip_set_archive_comment: Required memory could not be allocated\n");
			break;
		default:
			printf("zip_set_archive_comment: unknown err\n");
			break;
		}
	}

	// close, If any files within were changed, those changes are written to disk first
	iret = zip_close(zipfd);
	if (iret != 0)
	{
		printf("zip_close err[%d:%s]\n", errno, strerror(errno));
	}

	return 0;
}


2 总结

通过使用libzip可以方便的对zip的comment内容进行修改,但是限制必须使用可见的字符集,通过对libzip源码的简单修改,可以做到添加扩展的字符集。除了通过代码的方式,也可以直通使用命令行工具zip和unzip (-z add zipfile comment) 修改和读取zip的comment内容。

3 参考

[1] http://www.nih.at/libzip/zip_get_archive_comment.html
[2] http://www.nih.at/libzip/zip_set_archive_comment.html
[3] http://www.coderanch.com/t/530362/java-io/java/Zip-file-archive-comment-extended
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐