您的位置:首页 > 数据库

SQL Server 2008中如何为XML字段建立索引

2011-09-14 09:08 381 查看
SQL Server中的XML索引分为两类:主XML 索引和辅助XML索引。其中辅助XML索引又分为:PATH 辅助XML索引,VALUE 辅助XML索引,PROPERTY辅助XML索引。

创建XML索引的语法示例如下:

view plaincopy to clipboardprint?

create table XMLTable(Id int primary key, XMLCol xml);

go

--XML主索引

create primary xml index IPXML_XMLTable_XMLCol on XMLTable(XMLCol);

--XML路径辅助索引

create xml index IXML_XMLTable_XMLCol_Path on XMLTable(XMLCol)

using xml index IPXML_XMLTable_XMLCol for path

--XML属性辅助索引

create xml index IXML_XMLTable_XMLCol_Property on XMLTable(XMLCol)

using xml index IPXML_XMLTable_XMLCol for Property

--XML内容辅助索引

create xml index IXML_XMLTable_XMLCol_value on XMLTable(XMLCol)

using xml index IPXML_XMLTable_XMLCol for value

view plaincopy to clipboardprint?

static void Main(string[] args)

{

DataTable dt = GetTableSchema();

for (int count = 1; count <= 600000; count++)

{

DataRow r = dt.NewRow();

r[0] = count;

r[1] = GetPropertyXML();

dt.Rows.Add(r);

}

BulkToDB(dt);

Console.WriteLine("finished");

Console.ReadLine();

}

public static void BulkToDB(DataTable dt)

{

SqlConnection sqlConn = new SqlConnection(

ConfigurationManager.ConnectionStrings["ConnStr1"].ConnectionString);

SqlBulkCopy sqlBulkCopy = new SqlBulkCopy(sqlConn);

sqlBulkCopy.BulkCopyTimeout = 0;

sqlBulkCopy.BatchSize = dt.Rows.Count;

sqlBulkCopy.DestinationTableName = "XMLTable";

try

{

sqlConn.Open();

if (dt != null && dt.Rows.Count != 0)

{

sqlBulkCopy.WriteToServer(dt);

}

}

catch (Exception ex)

{

throw ex;

}

finally

{

sqlConn.Close();

}

}

public static DataTable GetTableSchema()

{

DataTable dt = new DataTable();

dt.Columns.AddRange(new DataColumn[]{

new DataColumn("Id",typeof(int)),

new DataColumn("XMLCol",typeof(string))});

return dt;

}

public static int GetRandRange(int start, int end)

{

Random random = new Random(Guid.NewGuid().GetHashCode());

return random.Next(start, end);

}

public static string GetPropertyXML()

{

StringBuilder buffer = new StringBuilder();

buffer.AppendLine("<TJVICTOR>");

for (int count = 0; count < GetRandRange(1, 10); count++)

{

int baseNum = GetRandRange(1, 100);

buffer.AppendLine(string.Format("<Item{0} v=/"Property{0}/">Value{0}</Item{0}>", baseNum));

}

buffer.AppendLine("</TJVICTOR>");

return buffer.ToString();

}

static void Main(string[] args)
{
DataTable dt = GetTableSchema();
for (int count = 1; count <= 600000; count++)
{
DataRow r = dt.NewRow();
r[0] = count;
r[1] = GetPropertyXML();
dt.Rows.Add(r);
}
BulkToDB(dt);

Console.WriteLine("finished");
Console.ReadLine();
}

public static void BulkToDB(DataTable dt)
{
SqlConnection sqlConn = new SqlConnection(
ConfigurationManager.ConnectionStrings["ConnStr1"].ConnectionString);

SqlBulkCopy sqlBulkCopy = new SqlBulkCopy(sqlConn);
sqlBulkCopy.BulkCopyTimeout = 0;
sqlBulkCopy.BatchSize = dt.Rows.Count;
sqlBulkCopy.DestinationTableName = "XMLTable";

try
{
sqlConn.Open();
if (dt != null && dt.Rows.Count != 0)
{
sqlBulkCopy.WriteToServer(dt);
}
}
catch (Exception ex)
{
throw ex;
}
finally
{
sqlConn.Close();
}
}

public static DataTable GetTableSchema()
{
DataTable dt = new DataTable();
dt.Columns.AddRange(new DataColumn[]{
new DataColumn("Id",typeof(int)),
new DataColumn("XMLCol",typeof(string))});
return dt;
}

public static int GetRandRange(int start, int end)
{
Random random = new Random(Guid.NewGuid().GetHashCode());
return random.Next(start, end);
}

public static string GetPropertyXML()
{
StringBuilder buffer = new StringBuilder();
buffer.AppendLine("<TJVICTOR>");

for (int count = 0; count < GetRandRange(1, 10); count++)
{
int baseNum = GetRandRange(1, 100);
buffer.AppendLine(string.Format("<Item{0} v=/"Property{0}/">Value{0}</Item{0}>", baseNum));
}

buffer.AppendLine("</TJVICTOR>");
return buffer.ToString();
}


3.执行一条查询语句,注意它的执行时间和执行计划:

select Id from XMLTable
where XMLCol.exist('/TJVICTOR/Item3')=1

由于机器配置不同,所以执行时间不会完全一样,这里只给出执行计划,以供参考:



所有时间都花在了Table Valued Function上,而且还是clustered index scan。

4.给这个表的XML字段加上索引。

view plaincopy to clipboardprint?

--XML主索引

create primary xml index IPXML_XMLTable_XMLCol on XMLTable(XMLCol);

--XML路径辅助索引

create xml index IXML_XMLTable_XMLCol_Path on XMLTable(XMLCol)

using xml index IPXML_XMLTable_XMLCol for path

--XML属性辅助索引

create xml index IXML_XMLTable_XMLCol_Property on XMLTable(XMLCol)

using xml index IPXML_XMLTable_XMLCol for Property

--XML内容辅助索引

create xml index IXML_XMLTable_XMLCol_value on XMLTable(XMLCol)

using xml index IPXML_XMLTable_XMLCol for value

--XML主索引
create primary xml index IPXML_XMLTable_XMLCol on XMLTable(XMLCol);
--XML路径辅助索引
create xml index IXML_XMLTable_XMLCol_Path on XMLTable(XMLCol)
using xml index IPXML_XMLTable_XMLCol for path
--XML属性辅助索引
create xml index IXML_XMLTable_XMLCol_Property on XMLTable(XMLCol)
using xml index IPXML_XMLTable_XMLCol for Property
--XML内容辅助索引
create xml index IXML_XMLTable_XMLCol_value on XMLTable(XMLCol)
using xml index IPXML_XMLTable_XMLCol for value

注意:由于我们表中已经有60万条数据,所以建索引时间会很久,而且会占用大量内存和磁盘,本人就花费了10分钟左右,占了1G内存,和1.3G磁盘。请大家建索引时注意自己的硬盘空间,或者修改前面插入数据的程序,少插入一些数据。

5.重新执行上面的Sql语句:

select Id from XMLTable
where XMLCol.exist('/TJVICTOR/Item3')=1

你会发现,瞬间就出结果了,下面是执行计划,用到了XML index seek。



总结:建立XML索引后,查询效率会大大提高,经过本人的测试,xml.exist的执行效率最高,基本上提高了一个数据级,其它语句比如xml.query,xml.value等,查询速度提高了一倍左右,但总体不是太理想。但同时也发现,xml索引太占空间,比如上面的60万条记录吧,空间占用比例如下:

name rows reserved data index_size unused
XMLTable 600000 1479688 KB 160952 KB 1318184 KB 552 KB
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: