您的位置:首页 > 移动开发 > IOS开发

IOS 实现TXT文本自动识别编码的方法。

2013-12-23 14:34 555 查看
TXT识别编码是个复杂的问题。幸好有c/c++的一个库能识别。
库的叫uchardet,可以自己去github下载。但是没有具体的使用手册什么的。小研究了下 。
把uchardet文件夹下的.h和.cpp和.tab文件全部copy到自己的应用里面。然后在要自动识别编码的地方
#include "uchardet.h"

如下代码实现。
//// EBOOKReadViewController.m// EBOOk//// Created by 赵锋 on 13-12-18.// Copyright (c) 2013年赵锋. All rights reserved.//
#import "EBOOKReadViewController.h"#include "uchardet.h"#define NUMBER_OF_SAMPLES (2048)@interfaceEBOOKReadViewController (){constchar *encode;}@end
@implementation EBOOKReadViewController
- (id)initWithNibName:(NSString *)nibNameOrNil bundle:(NSBundle *)nibBundleOrNil{self = [superinitWithNibName:nibNameOrNil bundle:nibBundleOrNil];if (self) {// Custom initialization }returnself;}
- (void)viewDidLoad{ [superviewDidLoad];
NSString *path=[[NSBundlemainBundle] pathForResource:@"五界至尊" ofType:@"txt"];
int result=[selfhaveTextBianMa:[path UTF8String]];CFStringEncoding cfEncode = 0;if (result==0) {
NSString *encodeStr=[[NSStringalloc] initWithCString:encodeencoding:NSUTF8StringEncoding];

if ([encodeStr isEqualToString:@"gb18030"]) {
cfEncode= kCFStringEncodingGB_18030_2000;
}elseif([encodeStr isEqualToString:@"Big5"]){
cfEncode= kCFStringEncodingBig5;
}elseif([encodeStr isEqualToString:@"UTF-8"]){
cfEncode= kCFStringEncodingUTF8;
}elseif([encodeStr isEqualToString:@"Shift_JIS"]){
cfEncode= kCFStringEncodingShiftJIS;
}elseif([encodeStr isEqualToString:@"windows-1252"]){
cfEncode= kCFStringEncodingWindowsLatin1;
}elseif([encodeStr isEqualToString:@"x-euc-tw"]){
cfEncode= kCFStringEncodingEUC_TW;
}elseif([encodeStr isEqualToString:@"EUC-KR"]){
cfEncode= kCFStringEncodingEUC_KR;
}elseif([encodeStr isEqualToString:@"EUC-JP"]){
cfEncode= kCFStringEncodingEUC_JP;
}
}
NSError *err;NSString *str=[NSStringstringWithContentsOfFile:path encoding:CFStringConvertEncodingToNSStringEncoding(cfEncode) error:&err];
NSString *st=[str substringToIndex:1000];
self.txtView.text=st;
}-(int)haveTextBianMa:(constchar*)strTxtPath{FILE* file;char buf[NUMBER_OF_SAMPLES];size_t len;uchardet_t ud;
/* 打开被检测文本文件,并读取一定数量的样本字符 */ file = fopen(strTxtPath, "rt");if (file==NULL) {printf("文件打开失败!\n");return1; } len = fread(buf, sizeof(char), NUMBER_OF_SAMPLES, file);fclose(file);
ud = uchardet_new();if(uchardet_handle_data(ud, buf, len) != 0) {printf("分析编码失败!\n");return -1; }uchardet_data_end(ud);printf("文本的编码方式是%s。\n", uchardet_get_charset(ud)); encode=uchardet_get_charset(ud);
uchardet_delete(ud);
return0;}- (void)didReceiveMemoryWarning{ [superdidReceiveMemoryWarning];// Dispose of any resources that can be recreated.}
@end

本文出自 “做fashion的IT人” 博客,请务必保留此出处http://kyoworkios.blog.51cto.com/878347/1344013
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: