您的位置：首页 > 数据库

postgresql 10 分区探密

2016-12-10 23:02 381 查看

postgresql官方终于要出分区了，开发线上已经看到提交分区代码了，下一个版本10带有分区功能应该没问题了，那么这个分区功能如何呢？且和我深入源码一探究竟。

原有分区介绍

分区用法介绍

首先介绍下原有的“分区”功能，这个很早就有了，以继承表的方式创建子表方式曲线实现的分区，如下例子：

create table tbl(
a int,
b varchar(10)
);
create table tbl_1 (
check ( a <= 1000 )
) INHERITS (tbl);
create table tbl_2 (
check ( a <= 10000 and a >1000 )
) INHERITS (tbl);
create table tbl_3 (
check ( a <= 100000 and a >10000 )
) INHERITS (tbl);

再通过创建触发器或者规则，实现数据分发，只需要向主表插入数据，可以自动分发到子分区表中，下面以触发器为例如下：

CREATE OR REPLACE FUNCTION tbl_part_tg()
RETURNS TRIGGER AS $$
BEGIN
IF ( NEW. a <= 1000 ) THEN
INSERT INTO tbl_1 VALUES (NEW.*);
ELSIF ( NEW. a > 1000 and NEW.a <= 10000 ) THEN
INSERT INTO tbl_2 VALUES (NE
18eda
W.*);
ELSIF ( NEW. a > 10000 and NEW.a <= 100000 ) THEN
INSERT INTO tbl_3 VALUES (NEW.*);
ELSIF ( NEW. a > 100000 and NEW.a <= 1000000 ) THEN
INSERT INTO tbl_4 VALUES (NEW.*);
ELSE
RAISE EXCEPTION 'data out of range!';
END IF;
RETURN NULL;
END;
$$
LANGUAGE plpgsql;

CREATE TRIGGER insert_tbl_part_tg
BEFORE INSERT ON tbl
FOR EACH ROW EXECUTE PROCEDURE tbl_part_tg();

这样一个postgres分区表就创建完毕，对应用来说透明的，插入查询都对主表操作，非常方便。

如何实现分区过滤

对于分区表来说，最大的好处在于分区剪枝功能，如果有50个分区表，对于某个条件值如果能确定，那么很可能就直接过滤掉了49个分区，大大提高扫描速度，当然也能将不同子分区表放在不同物理盘上，提高IO速度。那么对于查询是怎么实现子分区表过滤的呢？约束排除，是否能使用约束排除由constraint_exclusion 参数控制，它三个可设值，on，off，partition, on代表无条件打开，所有情况都会检束约束，off代表关闭，所有约束都不生效，partition代表对分区表（或者说继承表）会进行约束排查，其它表则不会，因为检查约束在生成计划时会有额外开销，为了精准定位才有了这三个参数，默认值是partition，即对分区表约束生效。

如：select *from tbl where a = 12345; 首先找到主表tbl，然后通过tbl找到它的子表，找到后再对再拿着谓词条件a = 12345对一个个子表约束进行检查，不符合条件表就去掉不扫描，实现分区表过滤，下面简单介绍下约束排除源码逻辑。

过滤代码分析

//从set_rel_size 基表大小估计函数开始介绍
static void
set_rel_size(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
//检查是否需要扫描
if (rel->reloptkind == RELOPT_BASEREL &&
relation_excluded_by_constraints(root, rel, rte)) //检查约束是否能排除掉该表
{
set_dummy_rel_pathlist(rel); //可以排除，不需要扫描该表
}
else if (rte->inh)//检查是否有子表
{
set_append_rel_size(root, rel, rti, rte);//有子表则开始检查所有子表并把不需要的去掉
}

//设置需要扫描的表
static void
set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
int         parentRTindex = rti;
... 为减少篇幅，忽略不重要代码
...

foreach(l, root->append_rel_list)
{   //遍历所有，root->append_rel_list 是含父表、子表所有relation的list,在前面已经准备好
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
int         childRTindex;
...
//拿到真实条件表达式，如这个用例就是拿到 a = 12345 这个条件
childquals = get_all_actual_clauses(rel->baserestrictinfo);
//调整append的relation属性，可能需要对一些特殊的表达式或查询结构复制一份并转换，本用例中不涉及
childquals = (List *) adjust_appendrel_attrs(root,
(Node *) childquals,

//常量表达式处理，对一些常量表达式会将值直接算出来
//显然本例中a=12345是一个列的OpExpr表达式    ，因此这里不会发生改变
childqual = eval_const_expressions(root, (Node *)
make_ands_explicit(childquals));

//下面条件成立直接判断不需要扫描该表，本例中均不会成立
if (childqual && IsA(childqual, Const) &&
(((Const *) childqual)->constisnull ||
!DatumGetBool(((Const *) childqual)->constvalue)))
{
set_dummy_rel_pathlist(childrel);
continue;
}

//可以简单认为make_ands_implicit与上面make_ands_explicit互逆
childquals = make_ands_implicit((Expr *) childqual);
//根据clause生成一个RestrictInfo结构
childquals = make_restrictinfos_from_actual_clauses(root,
childquals);
childrel->baserestrictinfo = childquals;

//检查约束是否能排除掉该表，即判断某个分区是要要扫描
if (relation_excluded_by_constraints(root, childrel, childRTE))
{
set_dummy_rel_pathlist(childrel);//进来即是排除掉，继续下一个表检查
continue;
}
...
...
//若约束无法排除掉某个分区，后续代码继续正常执行分区表相关计算

//表约束排查函数relation_excluded_by_constraints 简介
bool
relation_excluded_by_constraints(PlannerInfo *root,
RelOptInfo *rel, RangeTblEntry *rte)
{
List       *safe_restrictions;
...

//初步判断是否需要进行约束排除，return false则是不能，如constraint_exclusion 是off状态时
//这时根本没开约束排除功能，约束自然不能生效
if (constraint_exclusion == CONSTRAINT_EXCLUSION_OFF ||
(constraint_exclusion == CONSTRAINT_EXCLUSION_PARTITION &&
!(rel->reloptkind == RELOPT_OTHER_MEMBER_REL ||
(root->hasInheritedTarget &&
rel->reloptkind == RELOPT_BASEREL &&
rel->relid == root->parse->resultRelation))))
return false;

//检查 谓词条件（a=12345）调用的函数是否结果稳定，若稳定结果则将条件挂到safe_restrictions上
safe_restrictions = NIL;
foreach(lc, rel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
if (!contain_mutable_functions((Node *) rinfo->clause))
safe_restrictions = lappend(safe_restrictions, rinfo->clause);
}
//检查safe_restrictions条件本身是不是冲突，自身冲突则排除掉
if (predicate_refuted_by(safe_restrictions, safe_restrictions))
return true;

/* Only plain relations have constraints */
if (rte->rtekind != RTE_RELATION || rte->inh)
return false;

//把这个表的约束取出来
constraint_pred = get_relation_constraints(root, rte->relid, rel, true);

//检查这些约束的条件所调用的函数结果是否稳定，不稳定的不能作为排查条件
safe_constraints = NIL;
foreach(lc, constraint_pred)
{
Node       *pred = (Node *) lfirst(lc);
if (!contain_mutable_functions(pred))
safe_constraints = lappend(safe_constraints, pred);
}

//约束条件和谓词条件进行排查，如果冲突得返回true去掉该分区表
//如：约束条件为 a>1000 and a<=10000，谓词条件为a=12345，它们冲突则返回true
if (predicate_refuted_by(safe_constraints, rel->baserestrictinfo))
return true;

上面的例子如下，有四个分区的表，直接定位到了子表tbl_3，注意父表不能过滤，默认都要扫描，但如果分区表设计合理，父表不应该有数据，扫描代价为0，对性能几乎无影响

postgres=# explain select  *from tbl where a =11111;
QUERY PLAN
-------------------------------------------------------------
Append  (cost=0.00..24.50 rows=7 width=42)
->  Seq Scan on tbl  (cost=0.00..0.00 rows=1 width=42)
Filter: (a = 11111)
->  Seq Scan on tbl_3  (cost=0.00..24.50 rows=6 width=42)
Filter: (a = 11111)
(5 rows)

简单来说，postgresql原来分区是通过谓词条件和表上约束条件之间关系实现过滤的，当然这只能实现静态剪枝。

如何实现数据分发

基于规则的话，会在查询重写阶段按时替换规则生成新的插入语句，基于触发器会在insert主表前触发另外一个insert操作，这两个逻辑都比较简单，相关代码不再介绍。

postgresql10分区介绍

那么postgres10分区具有什么样的功能呢？先看用例

LIST分区语法

postgres=# CREATE TABLE list_parted (
postgres(# a int
postgres(# ) PARTITION BY LIST (a);
CREATE TABLE
postgres=# CREATE TABLE part_1 PARTITION OF list_parted FOR VALUES IN (1);
CREATE TABLE
postgres=# CREATE TABLE part_2 PARTITION OF list_parted FOR VALUES IN (2);
CREATE TABLE
postgres=# CREATE TABLE part_3 PARTITION OF list_parted FOR VALUES IN (3);
CREATE TABLE
postgres=# CREATE TABLE part_4 PARTITION OF list_parted FOR VALUES IN (4);
CREATE TABLE
postgres=# CREATE TABLE part_5 PARTITION OF list_parted FOR VALUES IN (5);
CREATE TABLE
postgres=#
postgres=# insert into list_parted values(32); --faled
ERROR:  no partition of relation "list_parted" found for row
DETAIL:  Failing row contains (32).
postgres=# insert into part_1 values(1);
INSERT 0 1
postgres=# insert into part_1 values(2);--faled
ERROR:  new row for relation "part_1" violates partition constraint
DETAIL:  Failing row contains (2).
postgres=# explain select *from list_parted where a =1;
QUERY PLAN
-----------------------------------------------------------------
Append  (cost=0.00..41.88 rows=14 width=4)
->  Seq Scan on list_parted  (cost=0.00..0.00 rows=1 width=4)
Filter: (a = 1)
->  Seq Scan on part_1  (cost=0.00..41.88 rows=13 width=4)
Filter: (a = 1)
(5 rows)

上面是LIST表，建表是先建主表，再建子表，子表以 PARTITION OF 方式说明和主表关系，约束条件应该就是后面的in里面，再来个范围表的例子。

RANGE分区语法

postgres=# CREATE TABLE range_parted (
postgres(# a int
postgres(# ) PARTITION BY RANGE (a);
CREATE TABLE
postgres=# CREATE TABLE range_parted1 PARTITION OF range_parted FOR VALUES from (1) TO (1000);
CREATE TABLE
postgres=# CREATE TABLE range_parted2 PARTITION OF range_parted FOR VALUES FROM (1000) TO (10000);
CREATE TABLE
postgres=# CREATE TABLE range_parted3 PARTITION OF range_parted FOR VALUES FROM (10000) TO (100000);
CREATE TABLE
postgres=#
postgres=# insert into range_parted1 values(343);
INSERT 0 1
postgres=#
postgres=# explain select *from range_parted where a=32425;
QUERY PLAN
---------------------------------------------------------------------
Append  (cost=0.00..41.88 rows=14 width=4)
->  Seq Scan on range_parted  (cost=0.00..0.00 rows=1 width=4)
Filter: (a = 32425)
->  Seq Scan on range_parted3  (cost=0.00..41.88 rows=13 width=4)
Filter: (a = 32425)
(5 rows)
postgres=# set constraint_exclusion = off;
SET
postgres=# explain select *from range_parted where a=32425;
QUERY PLAN
---------------------------------------------------------------------
Append  (cost=0.00..125.63 rows=40 width=4)
->  Seq Scan on range_parted  (cost=0.00..0.00 rows=1 width=4)
Filter: (a = 32425)
->  Seq Scan on range_parted1  (cost=0.00..41.88 rows=13 width=4)
Filter: (a = 32425)
->  Seq Scan on range_parted2  (cost=0.00..41.88 rows=13 width=4)
Filter: (a = 32425)
->  Seq Scan on range_parted3  (cost=0.00..41.88 rows=13 width=4)
Filter: (a = 32425)
(9 rows)

和LIST差不多，就是语法略有不同，范围表值是一个连续的范围，LIST表是单点或多点的集合。从上面例子可以看到，显然还是走的约束排除过滤子表的方式。

HASH分区语法

postgres=# CREATE TABLE hash_parted (
postgres(# a int
postgres(# ) PARTITION BY HASH (a);
ERROR:  unrecognized partitioning strategy "hash"
postgres=#
postgres=#
postgres=# CREATE TABLE cccc_parted (
postgres(# a int
postgres(# ) PARTITION BY cccc (a);
ERROR:  unrecognized partitioning strategy "cccc"
postgres=#

HASH分区语法还不支持，以后或许会支持。

postgresql10语法小结

与原来老的建分区方式比，简单了不少，不用建约束了，应该是内部创建了。插入能自动计算出子表插入位置，应该是在执行器增加根据给定值直接计算出目标分区，提高性能。分区过滤的原来一样，通过约束排除，目前没看到增强迹象，但毕竟只是开始。总的来说语法功能初步完善，但这仅是一个开始，以后肯定会越来越丰富强大的。

postgresql10性能简单测试

这里对分区测试主要关注两个点，1是分区剪枝，2是导入数据性能，参考德哥测试方法结果如下：

测试环境：pg10 DEBUG版，全默认编译，win7 i74770s cpu，普通硬盘。

传统方式建分区1000个，时间33.65秒，建规则22.2秒，总时间55.85秒。

第一个分区插入100W行用时185.75秒，第996个分区488.9秒。

第一个分区select用时172ms，几乎全是生成计划时间

第996个分区select用时171 ms，几乎全是生成计划时间

pg10分区方式建分区1000个，时间40.2秒，总时间40.2秒，环境同上。

第一个分区插入100W行用时3.5秒，第996个分区4.72秒。

第一个分区select用时133.5ms，几乎全是生成计划时间

第996个分区select用时133.8 ms，几乎全是生成计划时间

总的来说，新老分区剪枝走的策略一样，提升有限，但还是略有一点，主要是pg10计划时间稍短点，但insert性能实实在在的实现了质的飞跃, 两个数量级差别！

测试实况如下：

传统方式
create table test1(id int8, info text, crt_time timestamp);
do language plpgsql $$
declare
i int;
begin
for i in 1..1000 loop
execute 'create table test1_'||i||'(like test1 including all) inherits(test1)';
execute 'alter table test1_'||i||' add constraint ck_test1_'||i||' check(id>='||20000000::int8*(i-1)+1||' and id<'||20000000::int8*i+1||')';
end loop;
end;
$$;
--规则
do language plpgsql $$
declare
i int;
begin
for i in 1..1000 loop
execute 'create or replace rule r'||i||' as on insert to test1 where id >= '||20000000::int8*(i-1)+1||' and id<'||20000000::int8*i+1||' do instead (insert into test1_'||i||' values (new.id,new.info,new.crt_time))';
end loop;
end;
$$;
postgres=# insert into test1 select generate_series (19990000,21000000);
INSERT 0 0
Time: 185755.382 ms (03:05.755)

postgres=# insert into test1 select generate_series (19919990000,19921000000);
INSERT 0 0
Time: 488937.560 ms (08:08.938)

postgres=# explain analyze select *from test1 where id=20000000;
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Append  (cost=0.00..170.01 rows=2 width=48) (actual time=1.493..1.493 rows=1 loops=1)
->  Seq Scan on test1  (cost=0.00..0.00 rows=1 width=48) (actual time=0.163..0.163 rows=0 loops=1)
Filter: (id = 20000000)
->  Seq Scan on test1_1  (cost=0.00..170.01 rows=1 width=48) (actual time=1.328..1.328 rows=1 loops=1)
Filter: (id = 20000000)
Rows Removed by Filter: 10000
Planning time: 166.882 ms
Execution time: 1.552 ms
(8 rows)
Time: 172.326 ms

postgres=# explain analyze select *from test1 where id=19919990000;
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Append  (cost=0.00..170.01 rows=2 width=48) (actual time=0.155..1.628 rows=1 loops=1)
->  Seq Scan on test1  (cost=0.00..0.00 rows=1 width=48) (actual time=0.135..0.135 rows=0 loops=1)
Filter: (id = '19919990000'::bigint)
->  Seq Scan on test1_996  (cost=0.00..170.01 rows=1 width=48) (actual time=0.019..1.491 rows=1 loops=1)
Filter: (id = '19919990000'::bigint)
Rows Removed by Filter: 10000
Planning time: 165.187 ms
Execution time: 1.690 ms
(8 rows)
Time: 171.021 ms

pg10分区方式
postgres=# create table test(id int8, info text, crt_time timestamp)partition by range(id);
CREATE TABLE
Time: 41.593 ms
postgres=#
postgres=#  do language plpgsql $$
postgres$# declare
postgres$#   i int;
postgres$# begin
postgres$#   for i in 1..1000 loop
postgres$#     execute 'create table test_'||i||' PARTITION OF test FOR VALUES FROM ('||20000000::int8*(i-1)+1||') to ('||20000000::int8*i+1||')';
postgres$#   end loop;
postgres$# end;
postgres$# $$;
DO
Time: 40272.109 ms (00:40.272)

postgres=# insert into test select generate_series (19990000,21000000);
INSERT 0 1010001
Time: 3500.975 ms (00:03.501)
postgres=# insert into test select generate_series (19919990000,19921000000);
INSERT 0 1010001
Time: 4723.782 ms (00:04.724)

postgres=#  explain analyze select *from test where id=20000000;
QUERY PLAN
---------------------------------------------------------------------------------------------------------
Append  (cost=0.00..170.01 rows=2 width=48) (actual time=1.449..1.450 rows=1 loops=1)
->  Seq Scan on test  (cost=0.00..0.00 rows=1 width=48) (actual time=0.121..0.121 rows=0 loops=1)
Filter: (id = 20000000)
->  Seq Scan on test_1  (cost=0.00..170.01 rows=1 width=48) (actual time=1.328..1.328 rows=1 loops=1)
Filter: (id = 20000000)
Rows Removed by Filter: 10000
Planning time: 128.244 ms
Execution time: 1.491 ms
(8 rows)
Time: 133.588 ms

postgres=#  explain analyze select *from test where id=19919990000;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Append  (cost=0.00..170.01 rows=2 width=48) (actual time=0.153..1.566 rows=1 loops=1)
->  Seq Scan on test  (cost=0.00..0.00 rows=1 width=48) (actual time=0.136..0.136 rows=0 loops=1)
Filter: (id = '19919990000'::bigint)
->  Seq Scan on test_996  (cost=0.00..170.01 rows=1 width=48) (actual time=0.015..1.429 rows=1 loops=1)
Filter: (id = '19919990000'::bigint)
Rows Removed by Filter: 10000
Planning time: 128.298 ms
Execution time: 1.623 ms
(8 rows)
Time: 133.836 ms

postgresql10分区相关代码简析

parser语法篇：

AlterTableStmt:
...
|   ALTER TABLE relation_expr partition_cmd
|   ALTER TABLE IF_P EXISTS relation_expr
partition_cmd:
ATTACH PARTITION qualified_name ForValues
...
| DETACH PARTITION qualified_name
...
;
...

alter table 增加了分区支持，如：

ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2)；

注意这里的子表part_2表得先建好，然后才能alter table 挂到主表上。

CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
OptInherit OptPartitionSpec OptWith OnCommitOption OptTableSpace
...
| CREATE OptTemp TABLE qualified_name PARTITION OF qualified_name
OptPartitionElementList ForValues OptPartitionSpec OptWith
OnCommitOption OptTableSpace
...

OptPartitionSpec: PartitionSpec { $$ = $1; }
| /*EMPTY*/			{ $$ = NULL; }
;

PartitionSpec: PARTITION BY part_strategy '(' part_params ')'
...
;

part_strategy:  IDENT                   { $$ = $1; }
| unreserved_keyword	{ $$ = pstrdup($1); }
;

create table在OptInherit 后面OptPartitionSpec 说明的是主表是一个分区表，ForValues 后面的OptPartitionSpec 则说明子分区的下面还可以挂子分区，至少代码上表现是这样的，试了试效果如下：

postgres=#  CREATE TABLE range_list (
postgres(# a int
postgres(# ) PARTITION BY RANGE (a);
CREATE TABLE
postgres=# CREATE TABLE range_pa1 PARTITION OF range_list FOR VALUES from (1) TO (1000)  PARTITION BY LIST (a);;
CREATE TABLE
postgres=# CREATE TABLE range_pa2 PARTITION OF range_list FOR VALUES FROM (1000) TO (10000);
CREATE TABLE
postgres=# CREATE TABLE range_list1  PARTITION OF range_pa1 FOR VALUES IN (10);
CREATE TABLE
postgres=# CREATE TABLE range_list2  PARTITION OF range_pa1 FOR VALUES IN (20);
CREATE TABLE
postgres=# insert into  range_pa1 values(20);
INSERT 0 1
postgres=# insert into range_list2 values(20);
INSERT 0 1
postgres=# explain select *from range_list where a =20;
QUERY PLAN
-------------------------------------------------------------------
Append  (cost=0.00..41.88 rows=15 width=4)
->  Seq Scan on range_list  (cost=0.00..0.00 rows=1 width=4)
Filter: (a = 20)
->  Seq Scan on range_pa1  (cost=0.00..0.00 rows=1 width=4)
Filter: (a = 20)
->  Seq Scan on range_list2  (cost=0.00..41.88 rows=13 width=4)
Filter: (a = 20)
(7 rows)

果然能支持多级分区，且各级子分区都能起到过滤作用，但是子分区都能插入数据，即使它的下面还有下一级子分区。

CreateForeignTableStmt:
...
| CREATE FOREIGN TABLE qualified_name
PARTITION OF qualified_name OptPartitionElementList ForValues
SERVER name create_generic_options
...
| CREATE FOREIGN TABLE IF_P NOT EXISTS qualified_name
PARTITION OF qualified_name OptPartitionElementList ForValues
SERVER name create_generic_options

看起来似乎是支持直接把外部表作为某个表的子表，这个功能若好用会很实用，不过还没做测试。

DefineRelation分区建表篇：

这是建表DDL函数，建一个普通表都会进入该接口,里面增加了分区处理信息的逻辑.

ObjectAddress
DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
ObjectAddress *typaddress, const char *queryString)
{           //增加了queryString存建表语句，主表不用，子表创建时用
char        relname[NAMEDATALEN];
Oid         namespaceId;
List       *schema = stmt->tableElts;
...//中间省略部分代码
if (stmt->partbound)//如果有分区键约束信息
{
Node       *bound;
ParseState *pstate;
Oid         parentId = linitial_oid(inheritOids);
Relation    parent;

parent = heap_open(parentId, NoLock);
if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("\"%s\" is not partitioned",
RelationGetRelationName(parent))));

pstate = make_parsestate(NULL);
pstate->p_sourcetext = queryString;
//解析处理分区条件，如  from (1) to (10) 转成一个list
bound = transformPartitionBound(pstate, parent, stmt->partbound);
//分区范围检查，如果是否和已有分区冲突等
check_new_partition_bound(relname, parent, bound);
heap_close(parent, NoLock);

//保存子分区范围信息，实际上子表已经在上面创建好了，这里把分区范围信息更新就行
//pg_class中新增加了一列relpartbound存范围信息,这存的是一个就Node转出来的字符串结构
StorePartitionBound(rel, bound);

//更新本地立即可见
CommandCounterIncrement();
}
if (stmt->partspec)
{
char            strategy;
...
//解析处理分区键，如 partition by range(a),将这个转成一个PartitionSpec Node。
//这里有一点实现比较奇怪，对于分区类型，list,range,在parser语法解析阶段并不精确确定，存的是一个
//字符串，进入此函数中再做字符串比较，只允许list,range这两种情况，这为什么不放到gram.y中处理？
//放gram.y中处理更高效，也更清晰
stmt->partspec = transformPartitionSpec(rel, stmt->partspec,
&strategy);
//计算分区键属性值，拿着分区键去pg_attribute 表找全信息
ComputePartitionAttrs(rel, stmt->partspec->partParams,
partattrs, &partexprs, partopclass,
partcollation);
//分区键个数
partnatts = list_length(stmt->partspec->partParams);
//把分区信息存到 pg_partitioned_table 表中
StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs,
partopclass, partcollation);

//如果是分区键，必须设成非空
if (strategy == PARTITION_STRATEGY_RANGE)
{
for (i = 0; i < partnatts; i++)
{
AttrNumber  partattno = partattrs[i];
Form_pg_attribute attform = descriptor->attrs[partattno-1];

if (partattno != 0 && !attform->attnotnull)
{
/* Add a subcommand to make this one NOT NULL */
AlterTableCmd *cmd = makeNode(AlterTableCmd);

cmd->subtype = AT_SetNotNull;
cmd->name = pstrdup(NameStr(attform->attname));
cmds = lappend(cmds, cmd);
}
}
/*
* Although, there cannot be any partitions yet, we still need to
* pass true for recurse; ATPrepSetNotNull() complains if we don't
*/
if (cmds != NIL)
AlterTableInternal(RelationGetRelid(rel), cmds, true);
}
}
...
return address;
}

整体来说，建表逻辑主要增加了对主表的分区键处理逻辑（partition by range 和partition by list)和对子表的分区范围处理逻辑（for values from …to … 和for values in ..)。

分区键相关信息放在pg_partitioned_table系统表中，而分区范围值放在pg_class的relpartbound中，并且建主表时是不知道有多少子表的，也不需要知道，使用时可以动态取到，这对list、range分区表没问题，但是对于hash分区表就不行了，子表数不定就无法确定数据分发规则，因此基于这一套逻辑基本是无法实现hash分区表。

Select 查询篇

当对分区表执行查询时，如果constraint_exclusion 设置为on或者partition时，最终会在relation_excluded_by_constraints 函数中进行约束排除不需要扫描的表，排除逻辑和原来继续表逻辑是一样的，不再赘述。其中关键的区别约束或者分区表的范围来源途径不同，不管是普通表还是分区表，约束都在这里拿到：

constraint_pred = get_relation_constraints(root, rte->relid, rel, true);

在get_relation_constraints函数里面会区别约束和分区表范围，具体请看代码如下：

static List *
get_relation_constraints(PlannerInfo *root,
Oid relationObjectId, RelOptInfo *rel,
bool include_notnull)
{
List       *result = NIL;
Index       varno = rel->relid;
Relation    relation;
TupleConstr *constr;
List        *pcqual;

relation = heap_open(relationObjectId, NoLock);
//constr取到约束结构，普通表的
constr = relation->rd_att->constr;
//当然上面的约束还不是可直接用的，还需要转换成约束排除接口可用的
if (constr != NULL)
{
//如果这个表有约束则在这里面进行转换计算
}

//如果是分区表，那么下面的pcqual将取到分区范围，实际上这个pcqual取到的就是
//relation->rd_partcheck,如果这个值不空就直接拿，为空说明是第一次拿，还要从系统表中取一下，
//从pg_class的relpartbound字段拿到，就是上面建表时存的。
pcqual = RelationGetPartitionQual(relation, true);
if (pcqual)
{
...
//拿到后进行一些简单计算处理
}
}
//最终返回和约束完全相同的结构，分区范围也是表的一种约束，但与普通约束分开逻辑更加清晰，以后扩展功能也更方便

分区表Insert篇

前面语法篇测insert性能与继续表+触发器实现的分区比有了质的飞跃，性能提升2个数量级，初步猜测执行时增加了计算插入目标分区功能，实际情况如何呢？请看下面ExecInsert插入函数

static TupleTableSlot *
ExecInsert(ModifyTableState *mtstate,
TupleTableSlot *slot,
TupleTableSlot *planSlot,
List *arbiterIndexes,
OnConflictAction onconflict,
EState *estate,
bool canSetTag)
{
HeapTuple   tuple;
ResultRelInfo *resultRelInfo;
ResultRelInfo *saved_resultRelInfo = NULL;
Relation    resultRelationDesc;
Oid         newId;
...
//如果有分区进入下面逻辑
if (mtstate->mt_partition_dispatch_info)
{
int     leaf_part_index;
...
//直接用要插入的值slot去找目标分区
leaf_part_index = ExecFindPartition(resultRelInfo,
mtstate->mt_partition_dispatch_info,
slot,
estate);
...
//mtstate->mt_partitions上存的是所有的分区ResultRelInfo结构，leaf_part_index是代表
//目标表是第几个，直接跳转换到目标所在的内存取到目标分区表ResultRelInfo结构
resultRelInfo = mtstate->mt_partitions + leaf_part_index;
...
}
//其它基本同原来insert逻辑

int     //找分区
ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
TupleTableSlot *slot, EState *estate)
{
int     result;
...
//找到是第几个分区
result = get_partition_for_tuple(pd, slot, estate, &failed_at);
if (result < 0)
{
...
//负数即分区不存在，报错，要插的值有问题
}
//返回要插的分区数
return result;
}
//get_partition_for_tuple函数中怎么找不再具体介绍，大意是拿着要插的分区键值去和所有的分区条件做比较，以二分法搜索，找到符合的就返回分区编号，如第三个符合则返回3

显然可以看到，在执行时增加了计算插入目标分区的函数，直接确定插入哪个分区，因此插入性能爆涨。

对于update表操作，实际就是select+insert操作，新分区表目前对这块并没做优化，这块不再介绍。

postgresql10分区改进思考

范围键限制性太强，不管是list分区还是range分区，范围都只能指定简单数值

分区键指定时带有函数，似乎不能剪枝（还需要仔细测试）

hash分区支持，hash分区在有些场景下实用性很强。

分区表信息分开存在放，分区子表不一定要是一个完全功能的表，子表可不放pg_class，新建一个分区系统表，与pg_class分开，管理起来更方便。对于分区表使用多的应用场景，也不至于出现\d一下成千上万个表不停的在刷出来。

动态剪枝，目前来说是不支持分区动态剪枝的，但有些场景，是没办法在计划时确定条件值的，若不能动态调整计划就没办法进行裁剪。以后或者可以允许执行时，再回调某些计划函数，动态调整执行计划。

以上是基于postgresql10分区当前已经提交代码的一些个人见解和想法，不一定正确，欢迎大家讨论。

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航