Tuesday, February 25, 2025

Tips to minimize table lock for DDL on large tables for PostgreSQL

Here are some tips to minimize table lock for DDL on large tables:

  • NOT VALID for FOREIGN KEY/CHECK CONSTRAINT
  • CREATE INDEX CONCURRENTLY
  • UNIQUE/PRIMARY KEY CONSTRAINT using pre-created index


Examples:

mytest=# alter table t add constraint t_fk foreign key (id) references p(id) not valid;
ALTER TABLE
Time: 4.706 ms

mytest=# alter table t validate constraint t_fk;
ALTER TABLE
Time: 279.145 ms
mytest=# create unique index concurrently t_pk on t(id) ;
CREATE INDEX
Time: 201.014 ms

mytest=# alter table t add constraint t_pk primary key using index t_pk;
ALTER TABLE
Time: 4.386 ms

Sunday, February 23, 2025

Compare different PostgreSQL index types for text column

Prepare test case data

-- Create the table
CREATE TABLE website_data (
    table_storage_id BIGINT,
    uri TEXT,
    page_viewer_counter INTEGER
);

-- Function to generate a random number within a given range
CREATE OR REPLACE FUNCTION random_between(low INT, high INT) RETURNS INT AS $$
BEGIN
    RETURN floor(random() * (high - low + 1) + low)::INT;
END;
$$ LANGUAGE plpgsql;

-- Function to generate a random nested path for the URI
CREATE OR REPLACE FUNCTION generate_random_uri() RETURNS TEXT AS $$
DECLARE
    top_tier TEXT[] := ARRAY['/doc', '/prod', '/sales', '/customer'];
    num_layers INT;
    layer TEXT;
    result_uri TEXT;
    i INT;
BEGIN
    -- Select a random top tier
    result_uri := top_tier[random_between(1, array_length(top_tier, 1))];
    -- Generate a random number of layers between 3 and 5
    num_layers := random_between(3, 5);
    FOR i IN 1..num_layers LOOP
        -- Generate a random layer name (a simple alphanumeric string for example)
        layer := md5(random()::text || clock_timestamp()::text)::text;
        result_uri := result_uri || '/' || left(layer, 8);
    END LOOP;
    RETURN result_uri;
END;
$$ LANGUAGE plpgsql;

-- Insert 1 million rows of dummy data
INSERT INTO website_data (table_storage_id, uri, page_viewer_counter)
SELECT 
    generate_series(1, 1000000) AS table_storage_id,
    generate_random_uri() AS uri,
    random_between(0, 1000) AS page_viewer_counter;

mytest=# select * from website_data limit 10;
 table_storage_id |                          uri                           | page_viewer_counter
------------------+--------------------------------------------------------+---------------------
                1 | /sales/667ba163/ef63ee69/28750da6                      |                 516
                2 | /sales/9614802b/d5b5cc57/150aaa58/1a199920             |                 946
                3 | /customer/a577d741/e0bc954d/c6e0fd20/fead2dbf/6e0f8889 |                 788
                4 | /doc/8863091a/ee194b9d/716852a4/bd3ff291               |                 221
                5 | /doc/8e2df3f4/5d9856b4/eeed2bd2/97b16c45/0c3fc22b      |                 141
                6 | /doc/57068bd4/74eabae3/20a5fb9b                        |                  54
                7 | /customer/811aa406/f293f18b/c9d05b7e                   |                  36
                8 | /doc/7c9f200a/0b20f9eb/f4f3b778/63be4322               |                 398
                9 | /customer/85bd643e/22850ba5/59a57598/b27f3572          |                 509
               10 | /doc/7ec5ef49/757faacc/af257a5a/fe7ea09e               |                 662
(10 rows)

Create different indexes types:


CREATE INDEX idx_uri_btree ON website_data USING btree (uri);
Time: 711.671 ms

CREATE INDEX idx_uri_hash ON website_data USING hash (uri);
Time: 1010.717 ms (00:01.011)

mytest=# create extension btree_gist;
CREATE EXTENSION

CREATE INDEX idx_uri_gist ON website_data USING gist (uri);
Time: 16012.289 ms (00:16.012)

CREATE INDEX idx_uri_spgist ON website_data USING spgist (uri);
Time: 2257.327 ms (00:02.257)

VACUUM ANALYZE;

\di+ idx_uri_*
                                               List of relations
 Schema |      Name      | Type  |  Owner  |    Table     | Persistence | Access method |  Size  | Description
--------+----------------+-------+---------+--------------+-------------+---------------+--------+-------------
 public | idx_uri_btree  | index | donghua | website_data | permanent   | btree         | 64 MB  |
 public | idx_uri_gist   | index | donghua | website_data | permanent   | gist          | 102 MB |
 public | idx_uri_hash   | index | donghua | website_data | permanent   | hash          | 32 MB  |
 public | idx_uri_spgist | index | donghua | website_data | permanent   | spgist        | 66 MB  |


 mytest=# \dt+ website_data;
                                      List of relations
 Schema |     Name     | Type  |  Owner  | Persistence | Access method | Size  | Description
--------+--------------+-------+---------+-------------+---------------+-------+-------------
 public | website_data | table | donghua | permanent   | heap          | 83 MB |
load 'pg_hint_plan';
create extension pg_hint_plan;

Compare execution plans (take 2nd exectuion output to avoid cold cache)

mytest=# explain (analyze,buffers) /*+ SeqScan(website_data) */ select * from website_data where uri='/customer/811aa406/f293f18b/c9d05b7e';
                                                         QUERY PLAN

----------------------------------------------------------------------------------------------------------------------------
 Gather  (cost=1000.00..16792.43 rows=1 width=55) (actual time=0.236..27.291 rows=1 loops=1)
   Workers Planned: 2
   Workers Launched: 2
   Buffers: shared hit=513 read=10071
   ->  Parallel Seq Scan on website_data  (cost=0.00..15792.33 rows=1 width=55) (actual time=11.457..19.653 rows=0 loops=3)
         Filter: (uri = '/customer/811aa406/f293f18b/c9d05b7e'::text)
         Rows Removed by Filter: 333333
         Buffers: shared hit=513 read=10071
 Planning Time: 0.232 ms
 Execution Time: 27.308 ms
(10 rows)

Time: 27.985 ms
mytest=# explain (analyze,buffers) /*+ IndexScan(website_data idx_uri_btree) */ select * from website_data where uri='/customer/811aa406/f293f18b/c9d05b7e';
                                                         QUERY PLAN

-----------------------------------------------------------------------------------------------------------------------------
 Index Scan using idx_uri_btree on website_data  (cost=0.42..8.44 rows=1 width=55) (actual time=0.021..0.022 rows=1 loops=1)
   Index Cond: (uri = '/customer/811aa406/f293f18b/c9d05b7e'::text)
   Buffers: shared hit=4
 Planning Time: 0.157 ms
 Execution Time: 0.033 ms
(5 rows)

Time: 0.538 ms
mytest=# explain (analyze,buffers) /*+ IndexScan(website_data idx_uri_hash) */ select * from website_data where uri='/customer/811aa406/f293f18b/c9d05b7e';
                                                         QUERY PLAN

----------------------------------------------------------------------------------------------------------------------------
 Index Scan using idx_uri_hash on website_data  (cost=0.00..8.02 rows=1 width=55) (actual time=0.021..0.021 rows=1 loops=1)
   Index Cond: (uri = '/customer/811aa406/f293f18b/c9d05b7e'::text)
   Buffers: shared hit=2
 Planning Time: 0.224 ms
 Execution Time: 0.036 ms
(5 rows)

Time: 2.500 ms
mytest=# explain (analyze,buffers) /*+ IndexScan(website_data idx_uri_gist) */ select * from website_data where uri='/customer/811aa406/f293f18b/c9d05b7e';
                                                         QUERY PLAN

----------------------------------------------------------------------------------------------------------------------------
 Index Scan using idx_uri_gist on website_data  (cost=0.41..8.43 rows=1 width=55) (actual time=0.068..0.143 rows=1 loops=1)
   Index Cond: (uri = '/customer/811aa406/f293f18b/c9d05b7e'::text)
   Buffers: shared hit=21
 Planning Time: 0.149 ms
 Execution Time: 0.157 ms
(5 rows)

Time: 0.652 ms
mytest=# explain (analyze,buffers) /*+ IndexScan(website_data idx_uri_spgist) */ select * from website_data where uri='/customer/811aa406/f293f18b/c9d05b7e';
                                                          QUERY PLAN

------------------------------------------------------------------------------------------------------------------------------
 Index Scan using idx_uri_spgist on website_data  (cost=0.29..8.30 rows=1 width=55) (actual time=0.083..0.084 rows=1 loops=1)
   Index Cond: (uri = '/customer/811aa406/f293f18b/c9d05b7e'::text)
   Buffers: shared hit=6
 Planning Time: 0.165 ms
 Execution Time: 0.096 ms
(5 rows)

Time: 0.629 ms
Index TypeStorage Size (MB)Planning TimeExecution Time
BTree640.1570.033
Hash320.2240.036
Gist1020.1490.157
Spgist660.1650.096
No Index-0.23227.308

Saturday, February 22, 2025

Safely drop temporary table without dropping regular table with same name

 In PostgreSQL, it allows to create temporary table with same name as existing regular table, in such case, temporary table has higher "visibility" than regular table.

Once you drop the temporary table, you can see the data in regular table again. But, if you issue drop table twice, then you drop both temporary table and regular table.

How to prevent regular table being dropped? 

Here is a wrapper to safeguard this:

  1. -- Function to drop a temporary table if it exists
  2. CREATE OR REPLACE FUNCTION drop_temp_table_if_exists(table_name text)
  3. RETURNS void AS $$
  4. DECLARE
  5. temp_schema text;
  6. BEGIN
  7. -- Find the temporary schema name for the current session
  8. SELECT nspname INTO temp_schema
  9. FROM pg_namespace
  10. WHERE oid = pg_my_temp_schema();
  11. -- Check if the table exists in the temporary schema
  12. IF EXISTS (
  13. SELECT 1
  14. FROM pg_class c
  15. JOIN pg_namespace n ON c.relnamespace = n.oid
  16. WHERE n.nspname = temp_schema
  17. AND c.relname = table_name
  18. AND c.relkind = 'r' -- 'r' indicates a regular table (in this context, a temp table)
  19. ) THEN
  20. -- Construct the dynamic SQL to drop the table
  21. EXECUTE format('DROP TABLE %I', table_name);
  22. END IF;
  23. END;
  24. $$ LANGUAGE plpgsql;

Demonstration:
  1. mytest=# create table t1 (n text);
  2. CREATE TABLE
  3. Time: 4.322 ms
  4. mytest=# insert into t1 values ('regular');
  5. INSERT 0 1
  6. Time: 3.518 ms
  7. mytest=# create temporary table t1 (n text);
  8. CREATE TABLE
  9. Time: 3.192 ms
  10. mytest=# insert into t1 values ('temporary');
  11. INSERT 0 1
  12. Time: 2.869 ms
  13. mytest=# select * from t1;
  14. n
  15. -----------
  16. temporary
  17. (1 row)
  18. mytest=# SELECT drop_temp_table_if_exists('t1');
  19. drop_temp_table_if_exists
  20. ---------------------------
  21. (1 row)
  22. Time: 5.620 ms
  23. mytest=# select * from t1;
  24. n
  25. ---------
  26. regular
  27. (1 row)
  28. Time: 0.251 ms
  29. mytest=# SELECT drop_temp_table_if_exists('t1');
  30. drop_temp_table_if_exists
  31. ---------------------------
  32. (1 row)
  33. Time: 0.740 ms
  34. mytest=# select * from t1;
  35. n
  36. ---------
  37. regular
  38. (1 row)
  39. Time: 0.410 ms