如何自动识别判断url中的中文参数是GB2312还是Utf-8编码?
2014-08-20 15:15
561 查看
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Web;
using System.Text.RegularExpressions;
namespace ConsoleApplication2 {
class Program {
public static string DecodeURL2(String uriString) {
if (Regex.IsMatch(
HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("iso-8859-1")),
@"^(?:[x00-x7f]|[xe0-xef][x80-xbf]{2})+$" // 如果不考虑哪些什么拉丁文啊,希腊文啊。。。乱七八糟的外文,用这个短的正则
)) {
return HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("UTF-8"));
} else {
return HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("GB2312"));
}
}
public static string DecodeURL(String uriString) {
if (Regex.IsMatch(
HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("iso-8859-1")),
@"^(?:[x00-x7f]|[xfc-xff][x80-xbf]{5}|[xf8-xfb][x80-xbf]{4}|[xf0-xf7][x80-xbf]{3}|[xe0-xef][x80-xbf]{2}|[xc0-xdf][x80-xbf])+$"
)) {
return HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("UTF-8"));
} else {
return HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("GB2312"));
}
}
public static void Main(string[] args) {
Console.WriteLine("----------------------------------------------");
Console.WriteLine(DecodeURL(".net%bc%bc%ca%f5"));
Console.WriteLine(DecodeURL(".net%e6%8a%80%e6%9c%af"));
Console.WriteLine("----------------------------------------------");
Console.WriteLine(DecodeURL("%B8%A7%CB%B3%C7%E0%CB%C9%D2%A9%D2%B5"));
Console.WriteLine(DecodeURL("%E6%8A%9A%E9%A1%BA%E9%9D%92%E6%9D%BE%E8%8D%AF%E4%B8%9A"));
Console.WriteLine("------------------↓↓↓下面的出问题↓↓↓------------------");
Console.WriteLine(DecodeURL("%E8%81%94%E9%80%9A")); // 正常
Console.WriteLine(DecodeURL("%C1%AA%CD%A8")); // 发生编码误认
// 编码误认,并没有好的解决方案,因为utf-8和gbk编码结果存在交叉, 我们都知道,记事本也都会出现这种情况
Console.WriteLine("------------------↑↑↑上面的出问题↑↑↑------------------");
Console.WriteLine(DecodeURL2("%E8%81%94%E9%80%9A")); // 正常
Console.WriteLine(DecodeURL2("%C1%AA%CD%A8")); // 不会误认
Console.WriteLine("----------------------------------------------");
Console.ReadKey();
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Web;
using System.Text.RegularExpressions;
namespace ConsoleApplication2 {
class Program {
public static string DecodeURL2(String uriString) {
if (Regex.IsMatch(
HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("iso-8859-1")),
@"^(?:[x00-x7f]|[xe0-xef][x80-xbf]{2})+$" // 如果不考虑哪些什么拉丁文啊,希腊文啊。。。乱七八糟的外文,用这个短的正则
)) {
return HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("UTF-8"));
} else {
return HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("GB2312"));
}
}
public static string DecodeURL(String uriString) {
if (Regex.IsMatch(
HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("iso-8859-1")),
@"^(?:[x00-x7f]|[xfc-xff][x80-xbf]{5}|[xf8-xfb][x80-xbf]{4}|[xf0-xf7][x80-xbf]{3}|[xe0-xef][x80-xbf]{2}|[xc0-xdf][x80-xbf])+$"
)) {
return HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("UTF-8"));
} else {
return HttpUtility.UrlDecode(uriString, Encoding.GetEncoding("GB2312"));
}
}
public static void Main(string[] args) {
Console.WriteLine("----------------------------------------------");
Console.WriteLine(DecodeURL(".net%bc%bc%ca%f5"));
Console.WriteLine(DecodeURL(".net%e6%8a%80%e6%9c%af"));
Console.WriteLine("----------------------------------------------");
Console.WriteLine(DecodeURL("%B8%A7%CB%B3%C7%E0%CB%C9%D2%A9%D2%B5"));
Console.WriteLine(DecodeURL("%E6%8A%9A%E9%A1%BA%E9%9D%92%E6%9D%BE%E8%8D%AF%E4%B8%9A"));
Console.WriteLine("------------------↓↓↓下面的出问题↓↓↓------------------");
Console.WriteLine(DecodeURL("%E8%81%94%E9%80%9A")); // 正常
Console.WriteLine(DecodeURL("%C1%AA%CD%A8")); // 发生编码误认
// 编码误认,并没有好的解决方案,因为utf-8和gbk编码结果存在交叉, 我们都知道,记事本也都会出现这种情况
Console.WriteLine("------------------↑↑↑上面的出问题↑↑↑------------------");
Console.WriteLine(DecodeURL2("%E8%81%94%E9%80%9A")); // 正常
Console.WriteLine(DecodeURL2("%C1%AA%CD%A8")); // 不会误认
Console.WriteLine("----------------------------------------------");
Console.ReadKey();
}
}
}
相关文章推荐
- 自动识别判断url中的中文参数是GB2312还是Utf-8编码
- 判断URL中的中文参数是GB2312还是Utf-8编码
- URL参数GB2312和UTF-8编码 自动识别
- 十分钟内学会:自动识别GB2312与UTF-8编码的文件
- 前台页面使用utf-8编码,url中包含中文参数时后台获取参数乱码问题解决
- php识别中文编码并自动转换为UTF-8
- 如何自动判断url中汉字的编码格式
- php中文乱码怎么办如何让浏览器自动识别utf-8
- PHP自动判断字符串是gb2312还是utf8编码
- 如何自动判断Request.QueryString参数的编码格式
- 黄聪:PHP自动判断字符串是gb2312还是utf8编码
- html编码常见的有utf-8和gb2312编码等,应该如何判断选择?
- 如何判断一个文件的编码格式是gb2312还是gbk等
- 黄聪:C#中WebClient自动判断编码是UTF-8还是GBK,并且有超时判断功能
- 判断一段文件是UTF-8编码还是GB2312的编码方式
- URL传递的参数是UTF-8编码,在打开的页面正常显示(GB2312)的方法
- html编码常见的有utf-8和gb2312编码等,应该如何判断选择?
- QT4.6中文GB2312与utf-8和unicode及URL encoding %编码汇总
- 如何获取一个URL传参编码是UTF-8还是GBK的取巧解决方法
- ASP.NET自动识别GB2312与UTF-8编码的文件