您的位置:首页 > 编程语言 > C#

[C#]文本处理的常用函数(TextProcess)

2015-11-05 17:01 435 查看
Version:3.01

这个设计版本比较完善了。

支持百度搜索服务(TextPro.TextApp.baiDu)。



using System;
using System.Text;
using System.Net;
using System.IO;

using System.Windows.Forms;

namespace TextProcess
{
public class TextPro
{
/*
本类属于文本处理集成的类。

Author:moodlee(小木头)

历史设计版本:
1、 1.01/2015年11月18日
2、 1.02/2015年11月19日
3、 2.01/2015年11月20日
4、 3.01/2015年11月23日

目录格式: Note/Index-Name
如:Note=Null(无) 用来标志层级关系 第一级:Null 第二级:@ 第三极:__
/=/ 可以用来表示层级关系,也可以用来排版,使层级关系一目了然 第一级:/ 第二级:// 第三级:///
Index=1 表示该元素第一次出现的历史设计版本是1,参考《历史设计版本》

/1-TextPro

@//1-Basic Text Process
__///1-Get Mid Text --- OK:2015年11月18日
__///1-Get Front Text --- OK:2015年11月18日
__///1-Get Back Text --- OK:2015年11月18日
__///1-Get Rand Text --- OK:2015年11月18日
__///1-Get List --- OK:2015年11月18日
__///1-Remove Key Text --- OK:2015年11月19日
__///1-Remove Twin Note --- OK:2015年11月19日
__///3-Get Text From File --- OK:2015年11月20日
__///3-Write Text In File --- OK:2015年11月20日
__///3-Add Text In File --- OK:2015年11月20日

@//1-Html Text Process
__///2-Get Html Text --- OK:2015年11月19日
__///1-Foreach Item --- OK:2015年11月20日/2015年11月23日
__///1-Get Aimed Item --- OK:2015年11月20日
__///1-Get Value Of Element --- OK:2015年11月20日

@//3-Translation Process
__///3-Get Translation --- OK:2015年11月20日/2015年11月23日
__///3-Get Origin --- OK:2015年11月20日
__///3-Uninstall --- OK:2015年11月20日

@//4-TextApplication
__///4-Baidu --- OK:2015年11月23日/2015年11月24日

@//4-Official Translation Process
__///4-CSharpTrans
__///4-UrlTrans --- OK:2015年11月23日
__///4-HtmlTrans

*/

public class BasicPro {
public string getMid(//获取文本中间内容
string myData,//源数据
string myFrontLimit,//前限制
string myBackLimit,//后限制
char myReplace='$'//内置替换符号
/*
例子:
myData=12341234
myFrontLimit=2
myBackLimit=4
返回的结果:3
*/
){
string myResult = string.Empty;
//-----
string local_Data = myData;
string[] local_Words;
local_Data = local_Data.Replace(myFrontLimit ,myReplace .ToString ());
local_Words = local_Data.Split(myReplace);
local_Data = local_Words[1];
local_Data = local_Data.Replace(myBackLimit ,myReplace .ToString ());
local_Words = local_Data.Split(myReplace);
myResult = local_Words[0];
//-----
return myResult;
}
public string getFront(//获取文本前内容
string myData,//源数据
string myLimit,//限制符号
char myReplace='$'//内置替换符号
/*
例子:
myData=1234512345
myLimit=4
返回的结果:123
*/
) {
string myResult = string.Empty;
//-----
string local_Data = myData;
string[] local_Words;
local_Data = local_Data.Replace(myLimit ,myReplace .ToString ());
local_Words = local_Data.Split(myReplace );
myResult = local_Words[0];
//-----
return myResult;
}
public string getBack(//获取文本后内容
string myData,//源数据
string myLimit,//限制符号
char myReplace = '$'//内置替换符号
/*
例子:
myData=1234512345
myLimit=4
返回的结果:5
*/
){
string myResult = string.Empty;
//-----
string local_Data = myData;
string[] local_Words;
local_Data = local_Data.Replace(myLimit, myReplace.ToString());
local_Words = local_Data.Split(myReplace);
myResult = local_Words[local_Words .Length -1];
//-----
return myResult;
}

public string getRandText(//获取随机字符串
string myData="0123456789abcdefghijklnmopqrstuvwxyzABCDEFGHIJKLNMOPQRSTUVWXYZ",
int myCount=7
/*
例子:
返回的结果:dnoi2j1
返回的结果是7位随机的乱字符集合
*/
){
string myResult = string.Empty;
//-----
char[] local_Chars = myData.ToCharArray();
Random local_Random = new Random();
int local_index = 0;
for (int i = 0; i < myCount; i++) {
local_index = local_Random.Next(0,local_Chars .Length -1);
myResult = myResult + local_Chars[local_index].ToString();
}
//-----
return myResult;
}
public string[] getList(//获取文本分割后的列表
string myData,//源数据
string mySplit,//分割符号
char myReplace='$'//内置替换符号
){
string[] myResult=new string[] { };
//-----
string local_Data = myData;
string[] local_Words;
local_Data = local_Data.Replace(mySplit ,myReplace.ToString ());
local_Words = local_Data.Split(myReplace);
myResult = new string[local_Words .Length];
myResult = local_Words;
//-----
return myResult;
}
public string[] getList(//获取文本分割后的列表
string myData,//源数据
string[] mySplit,//分割符号集合
char myReplace = '$'//内置替换符号
){
string[] myResult;
//------
string local_Data = myData;
string[] local_Words;
string[] local_Split = mySplit;
for (int i = 0; i < local_Split.Length; i++) {
local_Data = local_Data.Replace(local_Split [i],myReplace .ToString ());
}
local_Words = local_Data.Split(myReplace);
myResult = local_Words;
//------
return myResult;
}
public string removeKey(//去除关键字
string myData,//源数据
string myKey//关键字
){
string myResult = string.Empty;
//-----
string local_Data = myData;
local_Data = local_Data.Replace(myKey ,string.Empty);
myResult = local_Data;
//-----
return myResult;

}
public string removeKey(//去除关键字组
string myData,//源数据
string[] myKeys//关键字
){
string myResult = string.Empty;
//-----
string local_Data = myData;
foreach (string i in myKeys) {
local_Data = local_Data.Replace(i,string.Empty);
}
myResult = local_Data;
//-----
return myResult;

}
public string removeTwinNote(//去除标记对及其里面的内
4000

string myData,//源数据
string myFrontNote,//前标记对
string myBackNote,//后标记对
char myReplace='$'//内置替换符号
){
string myResult = string.Empty;
//-----
string local_Data = myData;
string local_Item;
while (local_Data.IndexOf(myFrontNote) != -1 && local_Data.IndexOf(myBackNote)!=-1 )
{
local_Item = myFrontNote + getMid(local_Data, myFrontNote, myBackNote, myReplace)+myBackNote ;
local_Data = removeKey(local_Data ,local_Item);
}
myResult = local_Data;
//-----
return myResult;

}
public string getText(
string myFileName,//文件名(包含路径名)
Encoding myEncoding//编码
)
{
string myResult = string.Empty;
//-----
StreamReader local_StreamR = new StreamReader(myFileName ,myEncoding);
myResult = local_StreamR.ReadToEnd();
local_StreamR.Close();
//-----
return myResult;
}
public void writeText(
string myData,//源数据
string myFileName,//文件名(包含路径名)
Encoding myEncoding//编码
)
{
StreamWriter local_StreamW = new StreamWriter(myFileName,false,myEncoding );
local_StreamW.Write(myData);
local_StreamW.Close();
}
public void addText(
string myData,//源数据
string myFileName,//文件名(包含路径名)
Encoding myEncoding//编码
)
{
StreamWriter local_StreamW = new StreamWriter(myFileName, true, myEncoding);
local_StreamW.Write(myData);
local_StreamW.Close();
}
}
public class HtmlPro {
//Global Varity
BasicPro global_BasicPro = new BasicPro();
public string getHtml(//获取网页代码
string myUrl,//Url
Encoding myEncoding //网页编码
)
{
string myResult = string.Empty;
//-----
HttpWebRequest local_WebRQ = (HttpWebRequest)WebRequest.Create (myUrl);
local_WebRQ.Method = "GET";
WebResponse local_WebRS = local_WebRQ.GetResponse();
Stream local_Stream = local_WebRS.GetResponseStream();
StreamReader local_StreamR = new StreamReader(local_Stream ,myEncoding);
myResult = local_StreamR.ReadToEnd();
local_WebRS.Close();
local_Stream.Close();
local_StreamR.Close();
//-----
return myResult;
}
public string[] foreachItem(
string myData,//源数据
string myNote,
bool myContainParent=false
)
{
string[] myResult;
//-----

TransPro local_TransPro = new TransPro();
string local_oData = myData,local_Data=myData;
string local_Item, local_Front, local_Back, local_TransItem;
string[] local_Words;
string[] local_Items = new string[2048];int local_Index = 0;
string local_TransFront, local_TransBack;

local_Front = "<"+myNote;
local_Back = "</" + myNote + ">";

local_TransFront = local_TransPro.getTrans(local_Front);

local_TransBack = local_TransPro.getTrans(local_Back);
while (local_Data .IndexOf (local_Front )!=-1&&local_Data .IndexOf (local_Back) != -1)
{

local_Data = local_Data.Replace(local_Back ,"¥");
local_Words = local_Data.Split('¥');
local_Data = local_Words[0];
local_Data = local_Data.Replace(local_Front ,"¥");
local_Words = local_Data.Split('¥');
local_Data = local_Words[local_Words .Length -1];
local_Item = local_Front + local_Data + local_Back;
local_TransItem = local_TransFront + local_Data + local_TransBack ;
if(local_Data !=string.Empty)
{
local_Items[local_Index] = local_Item.Replace(local_TransFront, local_Front).Replace(local_TransBack, local_Back);
local_Index = local_Index + 1;
}else
{
continue;
}

if (myContainParent == true)
{
local_Data = local_oData.Replace(local_Item, local_TransItem);
}
else
{
local_Data = local_oData.Replace(local_Item, string.Empty);
}

local_oData = local_Data;
}

local_TransPro.unInstall();
myResult = new string[local_Index];
for(int i = 0; i < local_Index; i++)
{
myResult[i] = local_Items[i];
}
//-----
return myResult;
}
public string getAimedItem1(
string myData,
string myNote,
string myElement,
string myValue
)
{
string myResult = string.Empty;
//-----
string[] local_Items = foreachItem(myData ,myNote ,true );
foreach (string i in local_Items)
{
if(getValue (i,myElement )==myValue)
{
myResult = i;
break;
}
}
//-----
return myResult;
}
public string getAimedItem2(
string myData,
string myNote,
string myElement,
string myKeyOfValue
)
{
string myResult = string.Empty;
//-----
string[] local_Items = foreachItem(myData, myNote, true);
foreach (string i in local_Items)
{
if (getValue(i, myElement).IndexOf ( myKeyOfValue)!=-1)
{
myResult = i;
break;
}
}
//-----
return myResult;
}
public string getValue(
string myItem,
string myElement
)
{
string myResult = string.Empty;
//-----
myItem = global_BasicPro.getMid(myItem ,"<",">",'¥');
string[] local_FrontStyles = new string[] { myElement + "=\"", myElement + " = \"" };
string local_Front = local_FrontStyles[0];

foreach (string i in local_FrontStyles)
{
if(myItem .IndexOf(i) != -1)
{
local_Front = i;
myResult = global_BasicPro.getMid(myItem, local_Front, "\"");
break;
}
}

//-----
return myResult;
}
}
public class TransPro
{
//Introduction
/*
Extra File中的索引格式:
_Key<Trans>
Extra File中的转义字符统一格式:&x&
x为7位随机码
默认的转义字符统一格式:&Tran:x&
x为一位到两位的有序数字
*/
//global Varity
private string global_ExtraFile=string.Empty ;
private BasicPro global_BasicPro = new BasicPro();
//constructor
public TransPro() {
Directory.CreateDirectory("C:\\Program Files\\Temporary\\");
global_ExtraFile = "C:\\Program Files\\Temporary\\" + global_BasicPro.getRandText() + ".txt";
global_BasicPro.writeText(string .Empty ,global_ExtraFile ,Encoding.UTF8 );
}
public void unInstall() {//卸载删除临时数据
try {
File.Delete(global_ExtraFile);
Directory.Delete("C:\\Program Files\\Temporary\\");
} catch { }

}
//family member function
public string getTrans (
string myKey
)
{
string myResult = string.Empty;
//-----
string[] local_Keys = { "&","$","<",">","_","=","+","(",")","{","}"};
if(myKey .Length != 1)
{
for (int i = 0; i < local_Keys.Length; i++) {
myKey = myKey.Replace(local_Keys [i],"&Trans:"+i.ToString ()+"&");
}
}

switch (myKey) {
case "&":
myResult = "&Trans:1&";
break;
case "$":
myResult = "&Trans:2&";
break;
case "<":
myResult = "&Trans:3&";
break;
case ">":
myResult = "&Trans:4&";
break;
case "_":
myResult = "&Trans:5&";
break;
case "=":
myResult = "&Trans:6&";
break;
case "+":
myResult = "&Trans:7&";
break;
case "(":
myResult = "&Trans:8&";
break;
case ")":
myResult = "&Trans:9&";
break;
case "{":
myResult = "&Trans:10&";
break;
case "}":
myResult = "&Trans:11&";
break;
default:
myResult = support_getTransExtra(myKey);
break;
}
//-----
return myResult;
}
public string getOrigin(
string myTrans
)
{
string myResult = string.Empty;
//-----
string[] local_Keys = { "&", "$", "<", ">", "_", "=", "+", "(", ")", "{", "}" };

switch (myTrans) {
case "&Trans:1&":
myResult = "&";
break;
case "&Trans:2&":
myResult = "$";
break;
case "&Trans:3&":
myResult = "<";
break;
case "&Trans:4&":
myResult = ">";
break;
case "&Trans:5&":
myResult = "_";
break;
case "&Trans:6&":
myResult = "=";
break;
case "&Trans:7&":
myResult = "+";
break;
case "&Trans:8&":
myResult ="(";
break;
case "&Trans:9&":
myResult = ")";
break;
case "&Trans:10&":
myResult = "{";
break;
case "&Trans:11&":
myResult = "}";
break;
default:
myResult = support_getOriginExtra(myTrans);
break;
}

for (int i = 0; i < local_Keys.Length; i++)
{
myResult = myResult .Replace("&Trans:" + i.ToString() + "&", local_Keys[i]);
}

//-----
return myResult;
}
//support function
private string support_getTransExtra(
string myKey
)
{
string myResult = string.Empty;
//-----
string local_Data = global_BasicPro.getText(global_ExtraFile,Encoding.UTF8);
string local_Trans = string.Empty;
if (local_Data.IndexOf("_"+myKey ) != -1 && local_Data.IndexOf("<") != -1 && local_Data.IndexOf(">")!= -1)
{
local_Trans = global_BasicPro.getMid(local_Data, "_" + myKey + "<", ">");
}

string local_Item = string.Empty;
if (local_Trans == string.Empty)
{//表示尚未创建该转义对象。
//进行创建转义对象
local_Trans = "&" + global_BasicPro.getRandText() + "&";//获取随机转义字符
local_Item = "_" + myKey + "<" + local_Trans + ">";//索引条目
global_BasicPro.addText(local_Item, global_ExtraFile, Encoding.UTF8);//向Extra File 中添加索引条目
}
myResult = local_Trans;
//-----
return myResult;
}
private string support_getOriginExtra(
string myTrans
)
{
string myResult = string.Empty;
//-----
string local_Data = global_BasicPro.getText(global_ExtraFile, Encoding.UTF8);
string[] local_Words;
if (local_Data.IndexOf("<" + myTrans + ">") != -1) {
local_Data = local_Data.Replace("<"+myTrans +">","$");
local_Words = local_Data.Split('$');
local_Data = local_Words[0];
local_Words = local_Data.Split('_');
myResult = local_Words[local_Words .Length -1];
}
//-----
return myResult;
}
}
public class TextApp
{
//Global Varity
OfficialTransPro supportOT= new OfficialTransPro();
HtmlPro supportH = new HtmlPro();
BasicPro supportB = new BasicPro();
public string[,] baiDu(
string myKey,
int myPages = 1,
string mySite = "" ,
string myTitle="",
string myUrl=""
)
{
string[,] myResult;
//-----
//先对Address进行加工
string local_Key=supportOT .UrlTrans (myKey),
local_Key_Site = " site:" + mySite,
local_Key_Title = " intitle:" + myTitle,

9d52
local_Key_Url = " inurl:"+myUrl;
string local_BasicA = "http://www.baidu.com/s?wd=", local_Url;
string local_Pages= "&pn=" + (myPages *10-10).ToString ();
if (mySite != "") local_Key += local_Key_Site;
if (myTitle != "") local_Key += local_Key_Title;
if (myUrl != "") local_Key += local_Key_Url;
if (myPages > 1) local_Key += local_Pages;
local_Url = local_BasicA + local_Key;
//解析网页

string local_Data = supportH.getHtml(local_Url ,Encoding.UTF8);
string[] local_Items = supportH.foreachItem(local_Data ,"div");
string[] local_subItems;
string local_AimUrl, local_AimTitle;
string[,] local_Result = new string[32,2];
int local_Count = 0;
int test;
foreach (string i in local_Items)
{
if(int.TryParse ( supportH .getValue (i,"id"),out test))
{
if (int.Parse(supportH.getValue(i, "id")) < (myPages * 10 - 10)) return null;
local_subItems = supportH.foreachItem(i, "a");
local_AimUrl = supportH.getValue(local_subItems [0],"href");
local_AimTitle = supportB.removeTwinNote(local_subItems [0],"<",">");
local_Result[ local_Count,0] = local_AimTitle;
local_Result[local_Count,1] = local_AimUrl;
local_Count++;
}
}
myResult = new string[local_Count ,2];
for (int i = 0; i < local_Count; i++)
{
myResult[i, 0] = local_Result[i, 0];
myResult[i, 1] = local_Result[i, 1];
}
//-----
return myResult;
}
}
public class OfficialTransPro
{
public string UrlTrans(
string myUrl
)
{
string myResult = string.Empty;
//-----
string[] local_Trans = {"%25","%2B","%20" ,"%2F","%3F","%23","%26","%3D"};
string[] local_Origin = { "%","+"," ","/","?","#","&","="};
string local_Url = myUrl;
for(int i =0;i<local_Trans.Length; i++)
{
local_Url = local_Url.Replace(local_Origin [i],local_Trans [i]);
}
myResult = local_Url;
//-----
return myResult;
}
}
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: