您的位置:首页 > 编程语言 > C#

C# 文件从utf8或系统默认格式转gbk写入新文件(支持有bom和无bom)

2015-11-16 17:13 525 查看
using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using System.IO;

namespace SqlConCat

{

class Program

{

//获取文件编码

static public System.Text.Encoding GetFileEncodeType(string filename)

{

System.IO.FileStream fs = new System.IO.FileStream(filename, System.IO.FileMode.Open, System.IO.FileAccess.Read);

System.IO.BinaryReader br = new System.IO.BinaryReader(fs);

int i;

int.TryParse(fs.Length.ToString(), out i);

Byte[] buffer = br.ReadBytes(i);

br.Close();

if (IsUTF8Bytes(buffer) || (buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF))

{

return System.Text.Encoding.UTF8;

}

else if (buffer[0] == 0xFE && buffer[1] == 0xFF && buffer[2]==0x00)

{

return System.Text.Encoding.GetEncoding("UTF-16BE");

}

else if (buffer[0] == 0xFF && buffer[1] == 0xFE && buffer[2] == 0x41)

{

return System.Text.Encoding.Unicode;

}

return System.Text.Encoding.Default;

}

/// <summary>

/// 判断是否是不带 BOM 的 UTF8 格式

/// </summary>

/// <param name="data"></param>

/// <returns></returns>

private static bool IsUTF8Bytes(byte[] data)

{

int charByteCounter = 1; //计算当前正分析的字符应还有的字节数

byte curByte; //当前分析的字节.

for (int i = 0; i < data.Length; i++)

{

curByte = data[i];

if (charByteCounter == 1)

{

if (curByte >= 0x80)

{

//判断当前

while (((curByte <<= 1) & 0x80) != 0)

{

charByteCounter++;

}

//标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X

if (charByteCounter == 1 || charByteCounter > 6)

{

return false;

}

}

}

else

{

//若是UTF-8 此时第一位必须为1

if ((curByte & 0xC0) != 0x80)

{

return false;

}

charByteCounter--;

}

}

if (charByteCounter > 1)

{

throw new Exception("非预期的byte格式");

}

return true;

}

static void ConCatFile()

{

try

{

//编码方式

Encoding gbk = Encoding.GetEncoding("gbk");

Encoding utf8 = new UTF8Encoding(false);

Encoding defaultEncode = Encoding.Default;

//当前路径

string currentPath = System.Environment.CurrentDirectory;

DirectoryInfo dir = new DirectoryInfo(currentPath);

//

FileInfo[] inf = dir.GetFiles();

foreach (FileInfo finf in inf)

{

if (finf.Extension.Equals(".sql"))

{//如果扩展名为“.sql”

int a;

newWriter.WriteLine("\r\n-- "+finf.Name);

Console.WriteLine(GetFileEncodeType(finf.FullName).ToString()+" "+finf.Name);

FileStream fs = new FileStream(finf.FullName, FileMode.Open, FileAccess.Read);

byte[] bytes = new byte[fs.Length];

fs.Read(bytes, 0, bytes.Length);

fs.Close();

if (GetFileEncodeType(finf.FullName).Equals(defaultEncode))

{

byte[] gbkb = Encoding.Convert(defaultEncode, gbk, bytes);

string strGbk = gbk.GetString(gbkb);

newWriter.Write(strGbk);

}

else if (GetFileEncodeType(finf.FullName).Equals(Encoding.UTF8))

{

if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)

{

byte[] utf8b = Encoding.Convert(utf8, gbk, bytes, 3, bytes.Length - 3);

string strUtf8 = gbk.GetString(utf8b);

newWriter.Write(strUtf8);

Console.WriteLine(bytes[0].ToString()+utf8b[0].ToString());

}

else

{

byte[] utf8b = Encoding.Convert(Encoding.UTF8, gbk, bytes);

string strUtf8 = gbk.GetString(utf8b);

newWriter.Write(strUtf8);

}

}

newWriter.Close();

newFile.Close();

}

catch (DirectoryNotFoundException ex)

{ Console.WriteLine("文件路径不对"); }

}

static void Main(string[] args)

{

ConCatFile();

}

}

}

主要注意byte[] utf8b = Encoding.Convert(utf8, gbk, bytes,3, bytes.Length - 3);的用法,

去掉bom头的三个字节,否则会有多余的问号在文件开头,代码是删减过的,可能不能运行,自行修改
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: