您的位置:首页 > 编程语言 > C#

PDF 文件结构初探 开源iTextSharp(C#)  PDFLib中Block结构分析

2010-12-23 10:39 756 查看
PDFLib 中有一个Block功能,类似于模板,可以在pdf上添加文本,图像,PDF三种类型的模板,最后在运行时通过FillBlock功能填充模板动态生成PDF,同时Block支持多好属性定义,但是在PDF中添加Block只能依赖于Adobe Acrobat Pro + PDFlib-Block-Plugin.查了PDFLib的官方文档,除了介到可以利用 pdfmarks 生成 PDFlib 块(具体怎么使用还不太清楚)外没有别的办法,特别是想通过c#等编程方式对Block进行操作,好像没有相应的接口(PDFLib可以读出Block,但不提供Block的增删改).

先查阅了PDF文档格式的基础资料,详细请参考 : 一个简单的PDF文件结构的分析

结果是:要使用编程方式实现PDFLib中Block的操作必须对PDF文件结构进行操作,于是找到iTextSharp,c#版本的.
接着用记本事打开带有Block的PDF文档,发现类似的定义:
6 0 obj

<</ID 0/Name/Block_0/Rect[189.35 964.92 340.72 1007.16]/Subtype/Text/Type/Block/defaulttext(111)/encoding(cp936)/fitmethod/auto/fontname(Helvetica)/fontsize 12>>

endobj

5 0 obj

<</LastModified(D:20101207171225)/Private<</Blocks<</Block_0 6 0 R>>/PluginVersion(3.4)/Version 7.0>>>>

endobj

1 0 obj

<</Contents 2 0 R/MediaBox[0 0 612.72 1026.72]/PieceInfo<</PDFlib 5 0 R>>/Resources 7 0 R/Type/Page/Parent 47 0 R>>

endobj

47 0 obj

<</Type/Pages/Count 1/Kids[1 0 R]/ITXT(5.0.5)>>

endobj

48 0 obj

<</Type/Catalog/Pages 47 0 R>>

endobj

分析后结果:
PDF Block在PDF文件按页定义的,每一页的Block Name不能相同.
上面的数据中48#对象就是文档内容的定义:47 0 R,表示引用47#对象,47#对象定义了总页数为1(/Count 1),第一页对象是1#(/Kids[1 0 R]). 再向下找1#对象,其中的/PieceInfo<</PDFlib 5 0 R>>是PDF Block所增加的,正常的PDF是没有这个属性,表示这一页的Block对象定义引用了5#对象,再看5#对象定义了Block的版本,时间等,其中的/Private<</Blocks<</Block_0 6 0 R>>是Block的定义,这里只有一个引用了6.#对象.最后到了具体的Block了,6#对象是Block的定义,包括名称等所有属性.

有了以上的分析结果,再加上开源的iTextSharp的,对PDF Block的功能增加就容易多了.
为了更好的了解PDF的文件结构,用iTextSharp还作了一个PDF文件的Parse功能:用树型结构的方式展示PDF的定义,效果见下图:



相关代码:

//PdfBlock.cs

namespace iTextSharp.text.pdf
{

public static class PdfBlockConsts
{
public const int TYPE_TXT = 0;
public const int TYPE_IMG = 1;
public const int TYPE_PDF = 2;

//name const
public static readonly PdfName PDF_BLOCK = new PdfName("Block");

public static readonly PdfName BLOCK_PLUGIN_VER = new PdfName("PluginVersion");

public static readonly PdfName BLOCK_CUSTOM = new PdfName("Custom");
public static readonly PdfName BLOCK_LSATDATE = new PdfName("LastModified");
public static readonly PdfName BLOCK_BLOCKLIST = new PdfName("Blocks");

public static readonly PdfName BLOCK_PDFLIB = new PdfName("PDFlib");
public static readonly PdfName BLOCK_PIECE_INFO = new PdfName("PieceInfo");

public static readonly PdfName BLOCK_TEXT = new PdfName("Text");
public static readonly PdfName BLOCK_IMG = new PdfName("Image");
public static readonly PdfName BLOCK_PDF = new PdfName("PDF");

.........................

}

internal class PdfBlock : PdfDictionary
{

// membervariables
public static PdfName[] typeNames = { PdfBlockConsts.BLOCK_TEXT, PdfBlockConsts.BLOCK_IMG, PdfBlockConsts.BLOCK_PDF };

internal Block block ;

internal PdfIndirectReference blockRef { get; set; }

//这里是每个block的结构定义
internal PdfBlock(PdfWriterEx writer, Block tmpblock , bool Init)
: base(PdfBlockConsts.PDF_BLOCK)
{
type = tmpblock.BlockType;
block = new Block(tmpblock.BlockType, tmpblock.Name, tmpblock.Rect);
block.BlockID = writer.pdfblocks.Count;
block.Page = writer.Writer.CurrentPageNumber;

Put(PdfName.ID, new PdfNumber(block.BlockID));
Put(PdfName.NAME, new PdfName(block.Name));

Put(PdfName.RECT, new PdfRectangle(block.Rect));
Put(PdfName.SUBTYPE, typeNames[type]);
Put(PdfName.TYPE, PdfBlockConsts.PDF_BLOCK);

}

public static PdfBlock GetInstance(PdfWriterEx writer, int BlockType, int Page, string Name, Rectangle Rect)
{
return GetInstance(writer, new Block(BlockType, Name, Rect));
}

public static PdfBlock GetInstance(PdfWriterEx writer, Block tmpblock)
{
return new PdfBlock(writer, tmpblock, true);
}

}

//Block.cs

namespace iTextSharp.text.pdf
{
[Serializable]
public class Block
{
internal Dictionary<string, PdfObject> attributes = null;
internal Dictionary<string, PdfObject> customs = null;

public string Name { get; set; }
public int BlockID { get; set; }
public int Page { get; set; }
public int BlockType { get; set; }
public Rectangle Rect { get; set; }

public Block( int type, string name,Rectangle rect)
{
Name = name;
BlockType = type;
Rect = rect;

attributes = new Dictionary<string, PdfObject>();
customs = new Dictionary<string, PdfObject>();

}

}

//PdfWriterEx.cs

namespace iTextSharp.text.pdf
{
public class PageBlock
{
public List<Block> Blocks { get; set; }
public DateTime ModifyDate { get; set; }
public int PageNumber { get; set; }
public string Version {get;set;}
public string PluginVersion {get;set;}

public PageBlock()
{
Blocks = new List<Block>();
}

}

internal class PdfWriterEx
{
internal static readonly string BlockVersion = "7.0";
internal static readonly string BlockPluginVersion = "3.4";

public PdfWriter Writer { get; set; }

public PdfWriterEx(PdfWriter writer)
{
Writer = writer;
KeepBlock = false;
WriteBlock = true;
}

internal List<PdfBlock> pdfblocks = new List<PdfBlock>();

public static PdfWriterEx GetInstance(Document document, Stream os)
{
return new PdfWriterEx(PdfWriter.GetInstance(document, os));
}

public static PdfWriterEx GetInstance(Document document, Stream os, IDocListener listener)
{
return new PdfWriterEx(PdfWriter.GetInstance(document, os,listener));
}

protected void AddBlockToPage(PdfDictionary page)
{
if (pdfblocks.FindAll(delegate(PdfBlock pdfBlock)
{
return (pdfBlock.block.Page == Writer.CurrentPageNumber);
}
).Count > 0)
{
PdfDictionary peice = new PdfDictionary();
peice.Put(PdfBlockConsts.BLOCK_PDFLIB, AddPageBlocks(Writer.CurrentPageNumber));
page.Put(PdfBlockConsts.BLOCK_PIECE_INFO, peice);
}
}

//删除Page中的Block

internal static void RemoveBlocks(PdfDictionary page)
{
PdfDictionary piece = page.GetAsDict(PdfBlockConsts.BLOCK_PIECE_INFO);
if (piece != null)
{
PdfDictionary pageinfo = piece.GetAsDict(PdfBlockConsts.BLOCK_PDFLIB);
if (pageinfo != null)
{
PdfDictionary blocklist = pageinfo.GetAsDict(PdfName.PRIVATE);
PdfDictionary blocks = blocklist.GetAsDict(PdfBlockConsts.BLOCK_BLOCKLIST);
foreach (PdfName strKey in blocks.Keys)
{
PdfDictionary block = blocks.GetDirectObject(strKey) as PdfDictionary;
if (block.GetAsName(PdfName.TYPE).Equals(PdfBlockConsts.PDF_BLOCK))
{
block.Remove(strKey);
}
}
blocklist.Remove(PdfBlockConsts.BLOCK_BLOCKLIST);
pageinfo.Remove(PdfName.PRIVATE);
piece.Remove(PdfBlockConsts.BLOCK_PDFLIB);
}
page.Remove(PdfBlockConsts.BLOCK_PIECE_INFO);
}
}

public PdfBlock AddBlock(Block block)
{
if (haveBlockOnPage(block.Name))
{
throw new IOException(MessageLocalization.GetComposedMessage("the.document.has.block.page"));
}
PdfBlock pdfblock = PdfBlock.GetInstance(this, block);
return AddBlock(pdfblock);
}

public PdfBlock AddBlock(int BlockType, string Name, Rectangle Rect)
{
if (haveBlockOnPage(Name))
{
throw new IOException(MessageLocalization.GetComposedMessage("the.document.has.block.page"));
}
PdfBlock pdfblock = PdfBlock.GetInstance(this, BlockType, Writer.CurrentPageNumber, Name, Rect);
return AddBlock(pdfblock);
}

protected PdfBlock AddBlock(PdfBlock pdfblock)
{
pdfblocks.Add(pdfblock);
return pdfblock;
}

protected bool haveBlockOnPage(string Name)
{
return (pdfblocks.FindAll(delegate(PdfBlock pdfBlock)
{
return (pdfBlock.block.Page == Writer.CurrentPageNumber && pdfBlock.block.Name.ToLower() == Name.ToLower());
}).Count > 0);
}

//向Page中添加Blocks

protected PdfIndirectReference AddPageBlocks(int iPage)
{
if (pdfblocks.Count > 0)
{
List<PdfBlock> pageBlocks = pdfblocks.FindAll(delegate(PdfBlock pdfBlock)
{
return (pdfBlock.block.Page == iPage);
});

if (pageBlocks != null && pageBlocks.Count > 0)
{
PdfDictionary blockDict = new PdfDictionary();
foreach (PdfBlock pdfBlock in pageBlocks)
{
if (pdfBlock.blockRef == null)
{
buildBlockRef(pdfBlock);
}
blockDict.Put(new PdfName(pdfBlock.block.Name), pdfBlock.blockRef);
}

PdfDictionary tmpDict = new PdfDictionary();
tmpDict.Put(PdfBlockConsts.BLOCK_LSATDATE, new PdfDate());

PdfDictionary pageDict = new PdfDictionary();
pageDict.Put(PdfBlockConsts.BLOCK_BLOCKLIST, blockDict);
pageDict.Put(PdfBlockConsts.BLOCK_PLUGIN_VER, new PdfString(BlockPluginVersion));
pageDict.Put(PdfName.VERSION, new PdfNumber(BlockVersion));

tmpDict.Put(PdfName.PRIVATE, pageDict);

return Writer.AddToBody(tmpDict).IndirectReference;
}
}
return null;
}

protected void buildBlockRef(PdfBlock pdfBlock)
{
if(pdfBlock.blockRef!=null)
{
return;
}
if (pdfBlock.block.customs.Count > 0)
{
PdfDictionary customDict = new PdfDictionary();

foreach (string strKey in pdfBlock.block.customs.Keys)
{
if (pdfBlock.block.customs[strKey] != null)
{
customDict.Put(new PdfName(strKey), pdfBlock.block.customs[strKey]);
}
}
pdfBlock.Put(PdfBlockConsts.BLOCK_CUSTOM, customDict);
}

if (pdfBlock.block.attributes.Count > 0)
{
foreach (string strKey in pdfBlock.block.attributes.Keys)
{
if (pdfBlock.block.attributes[strKey] != null)
{
pdfBlock.Put(new PdfName(strKey), pdfBlock.block.attributes[strKey]);
}
}
}
pdfBlock.blockRef = Writer.AddToBody(pdfBlock).IndirectReference;
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: