您的位置:首页 > 编程语言 > Java开发

java读取文件字符集示例方法

2014-02-24 09:39 656 查看
public static String getCharset(File file) {        String charset = "GBK";        byte[] first3Bytes = new byte[3];        try {            boolean checked = false;            BufferedInputStream bis = new BufferedInputStream(                  new FileInputStream(file));            bis.mark(0);            int read = bis.read(first3Bytes, 0, 3);            if (read == -1)                return charset;            if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE) {                charset = "UTF-16LE";                checked = true;            } else if (first3Bytes[0] == (byte) 0xFE && first3Bytes[1]                == (byte) 0xFF) {                charset = "UTF-16BE";                checked = true;            } else if (first3Bytes[0] == (byte) 0xEF && first3Bytes[1]                    == (byte) 0xBB                    && first3Bytes[2] == (byte) 0xBF) {                charset = "UTF-8";                checked = true;            }            bis.reset();            if (!checked) {                int loc = 0;                while ((read = bis.read()) != -1) {                    loc++;                    if (read >= 0xF0)                        break;                    //单独出现BF以下的,也算是GBK                    if (0x80 <= read && read <= 0xBF)                        break;                    if (0xC0 <= read && read <= 0xDF) {                        read = bis.read();                        if (0x80 <= read && read <= 0xBF)// 双字节 (0xC0 - 0xDF)                            // (0x80 -                            // 0xBF),也可能在GB编码内                            continue;                        else                            break;                     // 也有可能出错,但是几率较小                    } else if (0xE0 <= read && read <= 0xEF) {                        read = bis.read();                        if (0x80 <= read && read <= 0xBF) {                            read = bis.read();                            if (0x80 <= read && read <= 0xBF) {                                charset = "UTF-8";                                break;                            } else                                break;                        } else                            break;                    }                }                System.out.println(loc + " " + Integer.toHexString(read));            }            bis.close();        } catch (Exception e) {            e.printStackTrace();        }        return charset;    }
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  java 文件 字符集