Saturday, February 5, 2022

Performance impact when too many nested partitions created in PostgreSQL

 

Use below scripts to setup the table

Script shortened for readability, refer to GitHub repo for complete code.

create table t1 (id int,amount numeric);

-- create partition with 1000 subpartitions
create table t2 (id int,amount numeric) partition by range(id);
/****
-- bash script to generate subpartitions
for i in {1..1000}
do
echo "create table t2_p_${i} partition of t2 for values from ($i) to ($((i+1)));"
done
***/
create table t2_p_1 partition of t2 for values from (1) to (2);
create table t2_p_2 partition of t2 for values from (2) to (3);
create table t2_p_3 partition of t2 for values from (3) to (4);
create table t2_p_4 partition of t2 for values from (4) to (5);
...
create table t2_p_997 partition of t2 for values from (997) to (998);
create table t2_p_998 partition of t2 for values from (998) to (999);
create table t2_p_999 partition of t2 for values from (999) to (1000);
create table t2_p_1000 partition of t2 for values from (1000) to (1001);



-- create partition with 1000 level of subpartitions
create table t3 (id int,amount numeric) partition by range(id);
create table t3_p_1 partition of t3 for values from (1) to (1001) partition by range(id);
/****
-- bash script to generate subpartitions
for i in {2..999}
do
echo "create table t3_p_${i} partition of t3_p_$((i-1)) for values from ($i) to (1001) partition by range(id);"
done
****/

create table t3_p_2 partition of t3_p_1 for values from (2) to (1001) partition by range(id);
create table t3_p_3 partition of t3_p_2 for values from (3) to (1001) partition by range(id);
create table t3_p_4 partition of t3_p_3 for values from (4) to (1001) partition by range(id);
create table t3_p_5 partition of t3_p_4 for values from (5) to (1001) partition by range(id);
create table t3_p_6 partition of t3_p_5 for values from (6) to (1001) partition by range(id);
...
create table t3_p_996 partition of t3_p_995 for values from (996) to (1001) partition by range(id);
create table t3_p_997 partition of t3_p_996 for values from (997) to (1001) partition by range(id);
create table t3_p_998 partition of t3_p_997 for values from (998) to (1001) partition by range(id);
create table t3_p_999 partition of t3_p_998 for values from (999) to (1001) partition by range(id);
create table t3_p_1000 partition of t3_p_999 for values from (1000) to (1001);

Performance impact about insert

mytest=> explain analyze verbose insert into t1 values (1000,0);
                                            QUERY PLAN
--------------------------------------------------------------------------------------------------
 Insert on public.t1  (cost=0.00..0.01 rows=1 width=36) (actual time=0.069..0.070 rows=0 loops=1)
   ->  Result  (cost=0.00..0.01 rows=1 width=36) (actual time=0.002..0.002 rows=1 loops=1)
         Output: 1000, '0'::numeric
 Planning Time: 0.022 ms
 Execution Time: 0.108 ms
(5 rows)


mytest=> explain analyze verbose insert into t2 values (1000,0);
                                            QUERY PLAN
--------------------------------------------------------------------------------------------------
 Insert on public.t2  (cost=0.00..0.01 rows=1 width=36) (actual time=0.297..0.297 rows=0 loops=1)
   ->  Result  (cost=0.00..0.01 rows=1 width=36) (actual time=0.003..0.003 rows=1 loops=1)
         Output: 1000, '0'::numeric
 Planning Time: 0.028 ms
 Execution Time: 4.936 ms
(5 rows)


mytest=> explain analyze verbose insert into t3 values (1000,0);
                                              QUERY PLAN
------------------------------------------------------------------------------------------------------
 Insert on public.t3  (cost=0.00..0.01 rows=1 width=36) (actual time=780.102..780.103 rows=0 loops=1)
   ->  Result  (cost=0.00..0.01 rows=1 width=36) (actual time=0.001..0.003 rows=1 loops=1)
         Output: 1000, '0'::numeric
 Planning Time: 0.029 ms
Execution Time: 780.885 ms
(5 rows)

From above, we can see the execution time for nested sub-partitions severely impacted. Below examples demonstrated 500x times slower comparing to non-partitioned table.

mytest=> do
mytest-> $$
mytest$> begin
mytest$>   for i in 1..1000 loop
mytest$>     insert into t1 values(1000,i::numeric);
mytest$>   end loop;
mytest$> end;
mytest$> $$;
DO
Time: 250.741 ms

mytest=> do
mytest-> $$
mytest$> begin
mytest$>   for i in 1..1000 loop
mytest$>     insert into t2 values(1000,i::numeric);
mytest$>   end loop;
mytest$> end;
mytest$> $$;
DO
Time: 255.405 ms

mytest=> do
mytest-> $$
mytest$> begin
mytest$>   for i in 1..1000 loop
mytest$>     insert into t3 values(1000,i::numeric);
mytest$>   end loop;
mytest$> end;
mytest$> $$;
DO
Time: 129140.941 ms (02:09.141)

Performance impact about SELECT


mytest=> explain analyze verbose select * from t1 where id=1000;
                                             QUERY PLAN
-----------------------------------------------------------------------------------------------------
 Seq Scan on public.t1  (cost=0.00..25.88 rows=6 width=36) (actual time=0.011..0.012 rows=1 loops=1)
   Output: id, amount
   Filter: (t1.id = 1000)
 Planning Time: 0.055 ms
 Execution Time: 0.057 ms
(5 rows)


mytest=> explain analyze verbose select * from t2 where id=1000;
                                                  QUERY PLAN
---------------------------------------------------------------------------------------------------------------
 Seq Scan on public.t2_p_1000 t2  (cost=0.00..25.88 rows=6 width=36) (actual time=0.011..0.012 rows=1 loops=1)
   Output: t2.id, t2.amount
   Filter: (t2.id = 1000)
 Planning Time: 0.067 ms
 Execution Time: 0.065 ms
(5 rows)


mytest=> explain analyze verbose select * from t3 where id=1000;
                                                  QUERY PLAN
---------------------------------------------------------------------------------------------------------------
 Seq Scan on public.t3_p_1000 t3  (cost=0.00..25.88 rows=6 width=36) (actual time=0.036..0.037 rows=1 loops=1)
   Output: t3.id, t3.amount
   Filter: (t3.id = 1000)
 Planning Time: 723.508 ms
 Execution Time: 0.607 ms
(5 rows)

Performance impact about UPDATE

mytest=> explain analyze verbose update t1 set amount=1 where id=1000;
                                                QUERY PLAN
-----------------------------------------------------------------------------------------------------------
 Update on public.t1  (cost=0.00..25.88 rows=6 width=42) (actual time=0.031..0.031 rows=0 loops=1)
   ->  Seq Scan on public.t1  (cost=0.00..25.88 rows=6 width=42) (actual time=0.014..0.015 rows=1 loops=1)
         Output: id, '1'::numeric, ctid
         Filter: (t1.id = 1000)
 Planning Time: 0.047 ms
 Execution Time: 0.091 ms
(6 rows)


mytest=> explain analyze verbose update t2 set amount=1 where id=1000;
                                                      QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------
 Update on public.t2  (cost=0.00..25.88 rows=6 width=42) (actual time=0.037..0.037 rows=0 loops=1)
   Update on public.t2_p_1000 t2_1
   ->  Seq Scan on public.t2_p_1000 t2_1  (cost=0.00..25.88 rows=6 width=42) (actual time=0.013..0.014 rows=1 loops=1)
         Output: t2_1.id, '1'::numeric, t2_1.ctid
         Filter: (t2_1.id = 1000)
 Planning Time: 0.080 ms
 Execution Time: 0.095 ms
(7 rows)


mytest=> explain analyze verbose update t3 set amount=1 where id=1000;
                                                      QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------
 Update on public.t3  (cost=0.00..25.88 rows=6 width=42) (actual time=2.761..2.762 rows=0 loops=1)
   Update on public.t3_p_1000 t3_1
   ->  Seq Scan on public.t3_p_1000 t3_1  (cost=0.00..25.88 rows=6 width=42) (actual time=0.006..0.008 rows=1 loops=1)
         Output: t3_1.id, '1'::numeric, t3_1.ctid
         Filter: (t3_1.id = 1000)
 Planning Time: 630.994 ms
 Execution Time: 9.249 ms
(7 rows)

No comments:

Post a Comment