您的位置:首页 > 移动开发 > Objective-C

用iTextSharp读取PDF文档中文本内容的探索

2009-09-21 10:20 330 查看
以下c#程序可以读取大多数的PDF中的文本内容。

 

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using iTextSharp;
using iTextSharp.text;
using iTextSharp.text.pdf;
using iTextSharp.text.xml;

namespace PDFR
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

       

        //退出程序的按钮

        private void button2_Click(object sender, EventArgs e)
        {
            Application.Exit();
        }

 

       

        //开始运行程序的按钮        private void button1_Click(object sender, EventArgs e)
        {
            string pdfStr = readPDF(文件名);
            System.IO.File.WriteAllText(文件名, pdfStr);
        }

       

        private string readPDF(string fn)
        {
            PdfReader p = new PdfReader(fn);
            //从每一页读出的字符串
            string str = String.Empty;
            //"[......]"内部字符串
            string subStr = String.Empty;
            //函数返回的字符串
            string rtStr = String.Empty;
            //从每一页读出的8位字节数组
            byte[] b = new byte[0];
            //"[","]","(",")"在字符串中的位置
            Int32 bg = 0, ed = 0, subbg = 0, subed = 0;

            //取得文档总页数
            int pg = p.NumberOfPages;

            for (int i = 1; i <= pg; i++)
            {
                bg = 0;
                ed = 0;

                Array.Resize(ref b, 0);
                //取得第i页的内容
                b = p.GetPageContent(i);

                //下一行是把每一页的取得的字节数据写入一个txt的文件,仅供研究时用
                //System.IO.File.WriteAllBytes(Application.StartupPath + "//P" + i.ToString() + ".txt", b);

                StringBuilder sb = new StringBuilder();
               
                //取得每一页的字节数组,将每一个字节转换为字符,并将数组转换为字符串
                for (int j = 0; j < b.Length; j++) sb.Append(Convert.ToChar(b[j]));
                str = sb.ToString();

                //循环寻找"["和"]",直到找不到"["为止
                while (bg > -1)
                {
                    //取得下一个"["和"]"的位置
                    bg = str.IndexOf("[", ed);
                    ed = str.IndexOf("]", bg + 1);

                    //如果没有下一个"["就跳出循环
                    if (bg == -1) break;

                    //取得一个"[]"里的内容,将开始寻找"("和")"的位置初始为0
                    subStr = str.Substring(bg + 1, ed - bg - 1);
                    subbg = 0;
                    subed = 0;

                    //循环寻找下一个"("和")",直到没有下一个"("就跳出循环
                    while (subbg > -1)
                    {
                        //取得下一对"()"的位置
                        subbg = subStr.IndexOf("(", subed);
                        subed = subStr.IndexOf(")", subbg + 1);

                        //如找不到下一对就跳出
                        if (subbg == -1) break;
                        //在返回字符串后面加上新找到的字符串
                        rtStr += subStr.Substring(subbg + 1, subed - subbg - 1);
                    }
                }
            }

            //PDF文档中读出来的数据没有换行符,可以根据需要把2个或3个连续的空格改成换行符
            rtStr = rtStr.Replace("  ", "/r/n");

            return rtStr;
        }       

    }
}
读取一个24页的文件, 这个文件中不包含中文(包含中文的还需要支持中文的插件), 以下是第一页读取的字节数组:BT
/F1 12 Tf
1 0 0 1 70.944 758.74 Tm
0 g
0 G
[(Re)6(ad)-3(in)-5(g 1)] TJ
ET
BT
1 0 0 1 122.66 758.74 Tm
[( )] TJ
ET
BT
/F2 10.56 Tf
1 0 0 1 70.944 743.86 Tm
[( )] TJ
ET
BT
/F3 12 Tf
1 0 0 1 70.944 727.78 Tm
[(W)103(arming)] TJ
ET
BT
1 0 0 1 114.38 727.78 Tm
[(-)] TJ
ET
BT
1 0 0 1 118.34 727.78 Tm
[(up )-9(Que)6(sti)-4(ons)] TJ
ET
BT
1 0 0 1 181.46 727.78 Tm
[(:)] TJ
ET
BT
1 0 0 1 185.54 727.78 Tm
[( )] TJ
ET
BT
/F2 10.56 Tf
1 0 0 1 70.944 712.66 Tm
[( )] TJ
ET
BT
/F2 12 Tf
1 0 0 1 70.944 696.58 Tm
[(Do )-217(y)20(ou )-209(a)-5(g)10(re)-2(e)4( )-199(with )-201(p)-9(e)4(o)-9(ple)] TJ
ET
BT
1 0 0 1 204.77 696.58 Tm
[(, )-199(ma)-17(y)20(be)-5( )-199(to )-201(a)-5( )-199(c)-5(e)4(rta)5(in)-11( )-199(e)4(x)-9(tent,)] TJ
ET
BT
1 0 0 1 346.75 696.58 Tm
[( )] TJ
ET
BT
1 0 0 1 352.15 696.58 Tm
[(who )-197(c)-5(riticiz)-6(e)4( )-199(moder)5(n )-199(sc)3(i)-11(e)4(nc)4(e)-5( )-199(a)4(nd)-9( )] TJ
ET
BT
1 0 0 1 70.944 680.98 Tm
[(tec)6(hnolog)-11(y)20( )-29(a)4(s )-20(the )-17(sourc)-3(e)4( )-29(of)3( )-19(human )-17(suf)22(f)-6(e)4(rin)-8(g)10(?)] TJ
ET
BT
1 0 0 1 290.93 680.98 Tm
[( )] TJ
ET
BT
1 0 0 1 294.05 680.98 Tm
[(W)-5(h)10(a)4(t )-21(doe)4(s )-20(the )-17(pr)3(e)-5(fe)7(r)-6(e)4(nc)-5(e)4( )-19(of)3( )-29(li)-3(ving)8( )-19(in )-21(a)4( )-19(c)4(ountr)-18(y)10( )] TJ
ET
BT
1 0 0 1 70.944 665.38 Tm
[(e)4(nvironmen)3(t )-321(indi)-3(c)4(a)4(te?)] TJ
ET
BT
1 0 0 1 181.82 665.38 Tm
[( )] TJ
ET
BT
1 0 0 1 188.78 665.38 Tm
[(I)13(s )-320(that )-319(a)4(lso )-322(a)4( )-319(p)-9(re)7(f)-6(e)4(r)] TJ
ET
BT
1 0 0 1 295.97 665.38 Tm
[(e)4(n)-9(c)-5(e)4( )-319(of)3( )-319(und)-9(e)4(ve)4(loped )-317(n)-9(a)4(tur)-8(e)4( )-319(to )-321(sc)3(ien)-7(c)4(e)4( )-329(a)4(nd )] TJ
ET
BT
1 0 0 1 70.944 649.78 Tm
[(tec)6(hnolog)-11(y)30(?)] TJ
ET
BT
1 0 0 1 129.62 649.78 Tm
[( )] TJ
ET
BT
/F1 12 Tf
1 0 0 1 296.69 633.94 Tm
[( )] TJ
ET
BT
1 0 0 1 236.57 618.34 Tm
[(T)-2(h)-3(e)4( Hazar)6(d)-3(s of)-7( S)-3(c)4(ience)] TJ
ET
BT
1 0 0 1 356.95 618.34 Tm
[( )] TJ
ET
BT
/F3 9 Tf
1 0 0 1 265.01 604.06 Tm
[(By L)-5(e)4(wis )11(T)-3(h)7(o)-6(ma)-4(s)] TJ
ET
BT
1 0 0 1 328.39 604.06 Tm
[( )] TJ
ET
BT
/F2 12 Tf
1 0 0 1 70.944 587.35 Tm
[( )] TJ
ET
BT
/F4 12 Tf
1 0 0 1 70.944 571.75 Tm
[<0037004B0048>5<0003>-9<0046>4<005200470048>4<0003>-9<005A0052>-7<005500470003>-6<004900520055>6<0003>-19<0046>4<0055004C0057004C0046004C00560050>-3<0003>-9<00520049>3<0003>-9<00560046>3<004C004800510046>6<0048>4<0003>-9<0044>4<005100470003>-9<0056>-10<0046>4<004C004800510057004C0056>-2<005700560003>-12<0057004B004800560048>5<0003>-9<00470044>-5<005C>20<00560003>-10<004C00560003>-12<00B3>4<004B00580045>-9<0055004C0056001100B4>4<0003>-9<003200510046>6<0048>4<0003>] TJ
ET
BT
/F2 12 Tf
1 0 0 1 466.78 571.75 Tm
[(y)] TJ
ET
BT
/F4 12 Tf
1 0 0 1 472.54 571.75 Tm
[<00520058>-9<00B600590048>7<0003>-9<00560044>3<004C>-11<0047>-9<0003>] TJ
ET
BT
1 0 0 1 70.944 556.15 Tm
[<0057004B004400570003>-269<005A00520055>5<0047000F0003>-289<005C>20<0052005800B600590048>7<0003>-269<0056>-10<0044>4<004C00470003>-271<004C0057>-3<0003>-269<0044>4<004F004F>-3<001E0003>-271<004C0057>-3<0003>-269<0056005800500056>-3<0003>-269<00580053000F0003>-269<004C00510003>-261<0044>4<0003>-269<005A00520055>5<0047000F0003>-269<0044>4<004F004F>-3<0003>-269<00520049>3<0003>-269<005700520047>-11<0044>-15<005C>] TJ
ET
BT
1 0 0 1 415.99 556.15 Tm
[<00B6>63<00560003>-280<0044>4<0053005300550048>7<004B0048>4<00510056004C00520051>-2<00560003>-280<0044>4<00510047>-9<0003>] TJ
ET
BT
/F2 12 Tf
1 0 0 1 70.944 540.55 Tm
[(mi)-3(sg)9(ivi)-3(ng)10(s )-50(in )-51(the )-47(publi)-3(c)4( )-39(mi)-3(nd)] TJ
ET
BT
1 0 0 1 216.53 540.55 Tm
[( )] TJ
ET
BT
/F4 12 Tf
1 0 0 1 220.13 540.55 Tm
[<00B2>] TJ
ET
BT
/F2 12 Tf
1 0 0 1 232.13 540.55 Tm
[( )] TJ
ET
BT
1 0 0 1 235.73 540.55 Tm
[(no)10(t )-51(just)5( )-49(a)4(bout )-51(w)12(ha)4(t )-51(is )-52(pe)4(rc)7(e)4(ived )-47(a)4(s )-50(the )-47(insuf)20(fe)7(r)-6(a)4(ble )-47(a)4(tt)-3(it)-3(ude)4( )] TJ
ET
BT
1 0 0 1 70.944 524.95 Tm
[(of)3( )-19(the )-17(s)-10(c)4(ientis)-2(ts )-22(themse)3(lves, )-18(but, )-21(e)4(n)-9(c)4(losed )-18(in )-21(t)] TJ
ET
BT
1 0 0 1 291.77 524.95 Tm
[(h)] TJ
ET
BT
1 0 0 1 297.89 524.95 Tm
[(e)4( )-19(s)-10(a)4(me )-17(wor)5(d,)-9( )-19(wh)-7(a)4(t )-21(sc)3(ien)-7(c)4(e)4( )-29(a)4(nd )-19(tec)6(hnolo)-11(g)-9(y)20( )-29(a)4(r)-6(e)-5( )] TJ
ET
BT
1 0 0 1 70.944 509.35 Tm
[(pe)4(rc)7(e)4(iv)-11(e)4(d to be doin)-9(g)10( to m)-3(a)4(ke)4( thi)-3(s ce)7(ntur)-8(y)80(, thi)] TJ
ET
BT
1 0 0 1 292.97 509.35 Tm
[(s ne)-6(a)4(r to it)-2(s end)3(ing)8(, turn out )-10(so wr)4(ong)] TJ
ET
BT
1 0 0 1 474.7 509.35 Tm
[(.)] TJ
ET
BT
1 0 0 1 478.18 509.35 Tm
[( )] TJ
ET
BT
1 0 0 1 70.944 493.75 Tm
[( )] TJ
ET
BT
/F4 12 Tf
1 0 0 1 70.944 478.15 Tm
[<00B3>4<002B005800450055>5<004C005600B40003>-18<004C00560003>-32<0044>4<0003>-19<00530052005A0048>-3<00550049>6<0058004F0003>-21<005A>-7<00520055>3<0047000F0003>-19<0046>4<0052005100570044004C0051004C0051>-11<004A>10<0003>-19<004F0044>-17<005C>20<0048>-5<005500560003>-17<00520049>3<0003>-29<00530052005A0048>6<0055>] TJ
ET
BT
/F2 12 Tf
1 0 0 1 343.99 478.15 Tm
[(ful )-18(mea)6(nin)-11(g)10(, )-19(de)4(riv)-8(e)4(d )-19(fr)6(om )-31(a)4( )-19(ve)4(r)-16(y)20( )-29(old)-11( )] TJ
ET
BT
1 0 0 1 70.944 462.55 Tm
[(wor)5(d, )-49(but )-51(with )-51(a)4( )-49(ne)4(w )-47(li)-3(fe)7( )-49(of)3( )-49(it)-3(s )-50(own, )-47(g)10(row)5(in)-11(g)10( )-49(wa)-3(y)20( )-59(be)-15(y)20(ond )-49(the )-47(li)-3(mi)-3(ts )-52(of)3( )-49(it)-3(s)9( )-49(or)3(ig)8(inal )-49(mea)6(nin)-11(g)10(. )] TJ
ET
BT
1 0 0 1 70.944 446.95 Tm
[(T)71(oda)-5(y)80(, )-119(it)-3( )-119(is )-122(strong )-119(e)4(nou)-9(g)10(h )-119(t)] TJ
ET
BT
1 0 0 1 208.49 446.95 Tm
[(o )-119(c)-5(a)4(rr)-13(y)20( )-119(th)-11(e)4( )-119(full )-120(we)6(i)-11(g)10(ht )-121(of)3( )-119(disapp)-8(rova)7(l )-121(for)-3( )-119(the )-117(c)4(a)4(st )-122(of)3( )-119(mi)-3(nd )-119(that)-9( )] TJ
ET
BT
1 0 0 1 70.944 431.33 Tm
[(thoug)8(ht )-101(up )-99(a)4(tom)-3(ic )-97(fusio)10(n )-99(a)4(nd )-99(fissi)-2(on )-99(a)4(s )-100(wa)-3(y)30(s )-100(of)-6( )-99(fir)4(st )-102(blowing)8( )-99(up )-99(a)4(nd )-99(lat)10(e)4(r )-96(he)4(a)4(ti)-3(n)-9(g)10( )-99(c)4(it)-3(ies )-98(a)4(s )] TJ
ET
BT
1 0 0 1 70.944 415.73 Tm
[(we)6(ll)-3( )-159(a)4(s )-160(the)-7( )-159(a)4(tt)-3(it)-3(ude)4(s )-160(wh)-7(ich )-157(led )-157(to )] TJ
ET
BT
1 0 0 1 244.85 415.73 Tm
[(stripm)-3(ini)-3(ng)10(,)] TJ
ET
BT
1 0 0 1 303.29 415.73 Tm
[( )] TJ
ET
BT
1
aac2
0 0 1 308.33 415.73 Tm
[(of)23(fshor)5(e)-5( )-159(oil)-3( )-159(we)6(ll)-3(s, )-160(K)-7(e)4(po)-9(ne)4(, )-159(food )-166(a)4(ddit)-3(ives, )] TJ
ET
BT
1 0 0 1 70.944 400.13 Tm
[(S)-3(S)-3(T)71(s, )] TJ
ET
BT
1 0 0 1 101.78 400.13 Tm
[(a)4(nd )-19(the )-17(ti)-3(n)-9(y)30( )-19(spher)6(i)-11(c)4(a)4(l )-21(pa)4(rticle)5(s )-20(of)3( )-19(plasti)-2(c)4( )-19(re)-2(c)4(e)4(ntl)-13(y)20( )-19(discov)-8(e)4(re)7(d )-19(c)4(lo)-11(gg)10(i)-11(ng)10( )-19(the )-17(wa)6(te)-7(rs )-17(of)3( )-19(the )] TJ
ET
BT
1 0 0 1 70.944 384.53 Tm
[(S)-3(a)4(r)13(g)10(a)4(sso S)-5(e)4(a)] TJ
ET
BT
1 0 0 1 133.58 384.53 Tm
[(.)] TJ
ET
BT
1 0 0 1 137.06 384.53 Tm
[( )] TJ
ET
BT
1 0 0 1 70.944 368.93 Tm
[( )] TJ
ET
BT
1 0 0 1 70.944 353.33 Tm
[(The)5( )-9(biom)-3(e)4(dica)6(l)-11( )-9(sc)3(ienc)-3(e)4(s )-20(a)4(re)7( )-9(no)-9(w )-17(c)4(a)4(u)-9(g)10(ht )-11(up)-9( )-9(with )-11(p)-9(h)-9(y)20(sica)5(l)-11( )-9(sc)3(ien)-7(c)4(e)4( )-19(a)4(nd )-9(te)-7(c)4(hn)-9(olog)-11(y)20( )-9(in )-11(th)-11(e)4( )-9(sa)3(me)-7( )] TJ
ET
BT
1 0 0 1 70.944 337.73 Tm
[(kind )-41(of)3( )-39(c)-5(ritica)5(l )-41(jud)-11(g)10(ment)-9(, )-39(with )-41(the )-37(s)-10(a)4(me )-37(p)-9(e)4(jora)5(ti)-3(v)-9(e)4( )] TJ
ET
BT
1 0 0 1 320.81 337.73 Tm
[(w)] TJ
ET
BT
1 0 0 1 329.47 337.73 Tm
[(or)3(d.)-9( )-39(H)] TJ
ET
BT
1 0 0 1 360.67 337.73 Tm
[(u)] TJ
ET
BT
1 0 0 1 366.67 337.73 Tm
[(br)3(is )-42(is)-12( )-39(re)7(spon)-10(sibl)-4(e)4(, )-39(it)-3( )-39(is )-42(sa)3(id, )-41(fo)-6(r )] TJ
ET
BT
1 0 0 1 70.944 322.13 Tm
[(the )-217(whole)4( )-219(biol)-3(og)10(ica)6(l )-221(r)-6(e)-5(volut)-3(ion. )-221(I)23(t )-221(is )-222(hubris )-219(that )-219(ha)4(s )-220(g)10(iven )-217(us )-220(the )-217(pr)3(o)-9(spec)7(ts )-222(of)3( )-219(be)4(ha)4(vior)-8( )] TJ
ET
BT
1 0 0 1 70.944 306.53 Tm
[(c)4(ontrol, )-80(ps)-10(y)20(c)4(ho)] TJ
ET
BT
1 0 0 1 145.82 306.53 Tm
[(sur)12(g)10(e)-5(r)-6(y)80(, )-79(f)-6(e)4(tal )-79(re)7(se)-6(a)4(rc)7(h, )-79(he)-5(a)4(rt )-78(tra)5(ns)-10(plants, )-80(the )-77(c)4(loni)-3(ng)10( )-79(of)3( )-79(pr)-6(omi)-3(ne)4(nt )-81(poli)-3(ti)-3(c)4(ians)11( )] TJ
ET
BT
1 0 0 1 70.944 290.93 Tm
[(fr)6(om )-131(bit)-3(s )-130(of)3( )-129(th)-11(e)4(ir )-128(own)-7( )-129(e)-5(mi)-3(ne)4(nt )] TJ
ET
BT
1 0 0 1 230.57 290.93 Tm
[(ti)-3(ssue, )-127(iatro)-6(g)10(e)4(nic)-7( )-129(disea)5(se)3(,)-9( )-129(ove)4(rpopu)3(lation,)-11( )-129(a)4(nd )-129(r)-6(e)4(c)4(ombi)-3(na)4(nt)-11( )] TJ
ET
BT
/F4 12 Tf
1 0 0 1 70.944 275.3 Tm
[<00270031>4<002400110003>-17<0037004B004C00560003>-21<004F004400560057000F0003>-20<0057004B00480003>-17<0051>-9<0048>4<005A0003>-27<005700480046>6<004B00510052004F0052004A>-11<005C>20<0003>-19<0057004B>-11<0044>4<00570003>-21<00530048>4<00550050004C00570056>-3<0003>-19<0057>-11<004B0048>4<0003>-19<00560057004C>-4<00570046004B004C0051004A>10<0003>-19<0052>-9<00490003>-16<005200510048>4<0003>-29<0046>4<0055>-6<0048>4<0044>4<00570058>-11<00550048>7<00B6>63<0056>-10<0003>-29<004A>10<0048>4<00510048>4<0056>-10<0003>-19<004C00510057>-3<00520003>-19<0057004B00480003>] TJ
ET
BT
/F2 12 Tf
1 0 0 1 70.944 259.7 Tm
[(DN)4(A)62( )-89(o)-9(f )-86(a)4(noth)-11(e)4(r)43(, )-89(to )-91(mak)-7(e)4( )-89(h)-19(y)20(br)3(ids, )-92(is )-92(c)4(u)-9(rr)6(e)4(ntl)-23(y)20( )-99(c)4(it)-3(e)4(d )-89(a)4(s )-90(the )-87(ult)-3(im)-3(a)4(te )-97(e)4(x)-9(a)4(mpl)-3(e)4( )-89(of)3( )-89(hubris. )-109(I)23(t)-11( )-89(is)-12( )] TJ
ET
BT
1 0 0 1 70.944 244.1 Tm
[(hubris f)] TJ
ET
BT
1 0 0 1 107.9 244.1 Tm
[(or)3( man to manuf)5(a)-5(c)4(ture)5( a)4( h)-19(y)20(b)-9(rid on his own)] TJ
ET
BT
1 0 0 1 315.17 244.1 Tm
[(.)] TJ
ET
BT
1 0 0 1 318.53 244.1 Tm
[( )] TJ
ET
BT
1 0 0 1 70.944 228.5 Tm
[( )] TJ
ET
BT
1 0 0 1 70.944 212.9 Tm
[(S)-3(o )-199(now )-197(we)6( )-209(a)4(r)] TJ
ET
BT
1 0 0 1 143.78 212.9 Tm
[(e)] TJ
ET
BT
1 0 0 1 149.18 212.9 Tm
[( )] TJ
ET
BT
/F4 12 Tf
1 0 0 1 154.58 212.9 Tm
[<00450044>-5<0046>4<004E0003>-199<00570052>-11<0003>-199<0057004B00480003>-197<0049004C0055>4<005600570003>-202<005A0052>-7<005500470003>-196<0044>-5<004A0044>4<004C0051000F0003>-201<0049>-6<0055005200500003>-198<00B3>4<004B>-19<005C>20<00450055>3<004C004700B4>-7<0003>-199<005700520003>-201<00B3>4<004B005800450055004C0056>-9<000F00B4>4<0003>-199<0044>4<005100470003>-199<0057004B>-11<0048>4<0003>-199<004B004C0047004700480051>-7<0003>] TJ
ET
BT
1 0 0 1 70.944 197.3 Tm
[<005000480044>6<0051004C0051004A0003>-171<00520049>3<0003>-169<0057005A00520003>-169<0045>-9<0048>4<004C0051>-11<004A>10<00560003>-180<004D0052004C>-3<00510048>4<00470003>-169<0058005100510044>4<0057005800550044>5<004F004F>-23<005C>20<0003>-169<00570052>-11<004A>10<0048>4<0057>-11<004B0048>4<00550003>-166<0045>-19<005C>20<0003>-169<005000440051>-7<0003>-169<004C00560003>-172<0056005200500048004B0052005A>-6<0003>-169<00550048>7<00570044004C00510048>4<00470011>-9<0003>-169<0037>71<005200470044>-15<005C>20<00B6>53<00560003>] TJ
ET
BT
/F2 12 Tf
1 0 0 1 70.944 181.7 Tm
[(joi)-3(ning)8( )-129(is )-132(stra)4(ig)8(ht )-131(out )-131(of)-6( )-129(Gr)5(e)4(e)4(k )-129(m)-21(y)20(thol)-3(o)] TJ
ET
BT
/F4 12 Tf
1 0 0 1 271.01 181.7 Tm
[<004A>-9<005C>30<001D0003>-131<004C0057>-3<0003>-129<004C00560003>-132<0057004B00480003>-127<0046>4<005200500045004C>-3<0051004C0051004A>8<0003>-129<00520049>3<0003>-129<005000440051>-7<00B6>63<0056>-10<0003>-129<0046>4<0044>4<00530044>4<0046>4<004C0057>-23<005C>20<0003>-129<005A004C0057004B0003>-131<0057004B0048>-7<0003>] TJ
ET
BT
/F2 12 Tf
1 0 0 1 70.944 166.1 Tm
[(spec)7(ial )-29(pr)3(e)-5(ro)-6(g)10(a)4(ti)-3(ve)4( )-29(o)-9(f )-26(the)-7( )-29(g)10(ods, )-40(a)4(nd )-29(it)-3( )-29(is )-32(re)7(a)4(ll)-23(y)20( )-29(in )-31(t)-11(his )-32(se)3(nse )-26(of)3( )-29(outra)-4(ge)4( )-29(that )-39(t)] TJ
ET
BT
1 0 0 1 439.27 166.1 Tm
[(h)] TJ
ET
BT
/F4 12 Tf
1 0 0 1 445.27 166.1 Tm
[<0048>4<0003>-29<005A00520055>5<00470003>-29<00B3>4<004B00580045>-9<0055004C005600B4>4<0003>] TJ
ET
BT
/F2 12 Tf
1 0 0 1 70.944 150.5 Tm
[(is )-222(be)4(ing)8( )-219(us)-10(e)4(d )-219(toda)-17(y)80(. )-219(Th)-8(a)4(t )-221(is )-222(wha)6(t )-221(the)-7( )-219(wor)5(d )-219(h)-9(a)-5(s )-220(g)10(r)-6(own )-217(int)-3(o, )-219(a)-5( )-219(wa)6(rnin)-8(g)10(, )-229(a)4( )-219(c)4(od)-9(e)4( )-219(wo)-7(rd, )-216(a)-5( )] TJ
ET
BT
1 0 0 1 70.944 134.9 Tm
[(shortha)4(nd )-119(sig)7(na)4(l )-121(f)-6(rom )] TJ
ET
BT
1 0 0 1 183.5 134.9 Tm
[(the )-117(lang)12(u)-9(a)-5(g)10(e)4( )-119(it)-3(se)3(lf: )-120(if )-118(man)-7( )-119(star)4(ts )-122(doing)8( )-119(thi)-3(ng)10(s )-120(r)-6(e)4(se)-6(rve)7(d )-119(for)6( )-119(the )-127(g)10(ods,)-10( )] TJ
ET
BT
1 0 0 1 70.944 119.3 Tm
[(de)4(if)-8(y)20(in)-11(g)10( )-39(him)-3(se)3(lf, )-38(t)] TJ
ET
BT
1 0 0 1 160.1 119.3 Tm
[(h)] TJ
ET
BT
1 0 0 1 166.22 119.3 Tm
[(e)4( )-39(out)-11(c)4(ome )-37(will)-3( )-39(be)4( )-39(somethi)-2(n)-9(g)10( )-49(wor)5(se)3( )-39(fo)-6(r )-36(him)-3(, )-39(s)-10(y)20(mbol)-3(ic)-7(a)4(ll)-13(y)90(,)-9( )-39(than )-37(the )-37(li)-3(tt)-3(e)4(rs )] TJ
ET
BT
1 0 0 1 70.944 103.68 Tm
[(of)3( wild boar)5(s and)3( domest)-10(ic sow)3(s we)5(r)-6(e)4( f)3(or)3( the )-7(a)4(nc)4(i)-11(e)4(nt R)-4(omans)] TJ
ET
BT
1 0 0 1 368.95 103.68 Tm
[(.)] TJ
ET
BT
1 0 0 1 372.31 103.68 Tm
[( )] TJ
ET
BT
1 0 0 1 70.944 88.08 Tm
[( )] TJ
ET
从以上的字节数组中可以看到: 大多数文本都是在“BT”和“ET”之间, 并在其中的“[”和“]”之间, 还在其中的“(”和“)”之间。所以就产生了以上的C#代码。只有带有“/F 12 Tf”标记的读不出来, 所以还要进一步研究。比如:BT
/F4 12 Tf
1 0 0 1 70.944 571.75 Tm
[<0037004B0048>5<0003>-9<0046>4<005200470048>4<0003>-9<005A0052>-7<005500470003>-6<004900520055>6<0003>-19<0046>4<0055004C0057004C0046004C00560050>-3<0003>-9<00520049>3<0003>-9<00560046>3<004C004800510046>6<0048>4<0003>-9<0044>4<005100470003>-9<0056>-10<0046>4<004C004800510057004C0056>-2<005700560003>-12<0057004B004800560048>5<0003>-9<00470044>-5<005C>20<00560003>-10<004C00560003>-12<00B3>4<004B00580045>-9<0055004C0056001100B4>4<0003>-9<003200510046>6<0048>4<0003>] TJ
ET
以上的字节数组肯定代表一些文本,但这些文本不符合以上规律,所以以上的程序无法读出。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息