您的位置:首页 > 编程语言 > C#

C#判断文本文件编码是ANSI还是无BOM的UTF8

2016-10-11 23:19 309 查看
Encoding GetBytesEncoding(byte[] bs)
{
int len = bs.Length;
if (len >= 3 && bs[0] == 0xEF && bs[1] == 0xBB && bs[2] == 0xBF)
{
return Encoding.UTF8;
}
int[] cs = { 7, 5, 4, 3, 2, 1, 0, 6, 14, 30, 62, 126 };
for (int i = 0; i < len; i++)
{
int bits = -1;
for (int j = 0; j < 6; j++)
{
if (bs[i] >> cs[j] == cs[j + 6])
{
bits = j;
break;
}
}
if (bits == -1)
{
return Encoding.Default;
}
while (bits-- > 0)
{
i++;
if (i == len || bs[i] >> 6 != 2)
{
return Encoding.Default;
}
}
}
return Encoding.UTF8;
}

string ReadAllFormatText(string filename)
{
byte[] bs = File.ReadAllBytes(filename);
int len = bs.Length;
if (len >= 3 && bs[0] == 0xEF && bs[1] == 0xBB && bs[2] == 0xBF)
{
return Encoding.UTF8.GetString(bs, 3, len - 3);
}
int[] cs = { 7, 5, 4, 3, 2, 1, 0, 6, 14, 30, 62, 126 };
for (int i = 0; i < len; i++)
{
int bits = -1;
for (int j = 0; j < 6; j++)
{
if (bs[i] >> cs[j] == cs[j + 6])
{
bits = j;
break;
}
}
if (bits == -1)
{
return Encoding.Default.GetString(bs);
}
while (bits-- > 0)
{
i++;
if (i == len || bs[i] >> 6 != 2)
{
return Encoding.Default.GetString(bs);
}
}
}
return Encoding.UTF8.GetString(bs);
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息