您的位置:首页 > 大数据

[C#]_[使用微软OpenXmlSDK (OpenXmlReader)读取xlsx表格] 读取大数据量100万条数据Excel文件解决方案

2018-07-18 02:17 459 查看
 

1.OpenXmlSDK是个很好的类库,可惜只能通过C#调用,C#的童鞋又福气了。

2.服务端程序由于没法安装office,所以这个对asp.net网站来说是最理想的库了。需要.net 4.0版本以上.

3.以流形式,sax模型读取大文件。

 

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

namespace ConsoleApplication1
{
class ProductObject
{

public String xinghao;//型号
public String changjia;//厂家
public String pihao;//批号
public String fengzhuang;//封装
public String shuliang;//数量

public void init()
{
//初始化成员变量
}
}

class Program
{
static void Main(string[] args)
{
String fileName = @"E:\software\TestData\xlsx\test.xlsx";
Program pro = new Program();
pro.ReadAllCellValues(fileName);
}

void CallSqlInsert(ProductObject po)
{
//call bl interface method to insert data to database.
//注意处理空值的情况
Console.Out.Write("{0}:{1}:{2}:{3}:{4} ", po.xinghao, po.changjia, po.pihao, po.fengzhuang, po.shuliang);
Console.Out.WriteLine();
}

String GetCellValue(WorkbookPart workbookPart,Cell c)
{
string cellValue;
if (c.DataType != null && c.DataType == CellValues.SharedString)
{
SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText));
cellValue = ssi.Text.Text;
}
else
{
cellValue = c.CellValue.InnerText;
}
return cellValue;
}

Boolean StoreProductObject(OpenXmlReader reader, WorkbookPart workbookPart, ProductObject po)
{
reader.ReadFirstChild();
if (reader.ElementType != typeof(Cell))
{
return false;
}

Cell c = (Cell)reader.LoadCurrentElement();
po.xinghao = GetCellValue(workbookPart,c);

reader.ReadNextSibling();
c = (Cell)reader.LoadCurrentElement();
po.changjia = GetCellValue(workbookPart,c);

reader.ReadNextSibling();
c = (Cell)reader.LoadCurrentElement();
po.pihao = GetCellValue(workbookPart,c);

reader.ReadNextSibling();
c = (Cell)reader.LoadCurrentElement();
po.fengzhuang = GetCellValue(workbookPart,c);

reader.ReadNextSibling();
c = (Cell)reader.LoadCurrentElement();
po.shuliang = GetCellValue(workbookPart,c);
return true;
}

//这种SAX的读取方式很高效,至于是读一行提交一行好还是读100行再提交100行好自己决定.
//这种SAX的方式对读取超大xlsx文件不存在内存占用过大和慢的问题.
void ReadAllCellValues(string fileName)
{
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(fileName, false))
{
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
ProductObject po = new ProductObject();

foreach (WorksheetPart worksheetPart in workbookPart.WorksheetParts)
{
OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);
while (reader.Read())
{
if (reader.ElementType == typeof(Worksheet))
{
if (reader.ReadFirstChild())
{
SheetProperties properties = (SheetProperties)reader.LoadCurrentElement();
//只处理名字是Sheet1的,其他不处理
if (properties.CodeName != "Sheet1")
{
break;
}
}
}
if (reader.ElementType == typeof(Row))
{
//先调用一个成员函数初始化init,自己写.
po.init();
if (StoreProductObject(reader, workbookPart, po))
{
CallSqlInsert(po);
}

}
}
}
}
}

}
}

 

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: