Sunday, February 23, 2025

Compare different PostgreSQL index types for text column

Prepare test case data

-- Create the table
CREATE TABLE website_data (
    table_storage_id BIGINT,
    uri TEXT,
    page_viewer_counter INTEGER
);

-- Function to generate a random number within a given range
CREATE OR REPLACE FUNCTION random_between(low INT, high INT) RETURNS INT AS $$
BEGIN
    RETURN floor(random() * (high - low + 1) + low)::INT;
END;
$$ LANGUAGE plpgsql;

-- Function to generate a random nested path for the URI
CREATE OR REPLACE FUNCTION generate_random_uri() RETURNS TEXT AS $$
DECLARE
    top_tier TEXT[] := ARRAY['/doc', '/prod', '/sales', '/customer'];
    num_layers INT;
    layer TEXT;
    result_uri TEXT;
    i INT;
BEGIN
    -- Select a random top tier
    result_uri := top_tier[random_between(1, array_length(top_tier, 1))];
    -- Generate a random number of layers between 3 and 5
    num_layers := random_between(3, 5);
    FOR i IN 1..num_layers LOOP
        -- Generate a random layer name (a simple alphanumeric string for example)
        layer := md5(random()::text || clock_timestamp()::text)::text;
        result_uri := result_uri || '/' || left(layer, 8);
    END LOOP;
    RETURN result_uri;
END;
$$ LANGUAGE plpgsql;

-- Insert 1 million rows of dummy data
INSERT INTO website_data (table_storage_id, uri, page_viewer_counter)
SELECT 
    generate_series(1, 1000000) AS table_storage_id,
    generate_random_uri() AS uri,
    random_between(0, 1000) AS page_viewer_counter;

mytest=# select * from website_data limit 10;
 table_storage_id |                          uri                           | page_viewer_counter
------------------+--------------------------------------------------------+---------------------
                1 | /sales/667ba163/ef63ee69/28750da6                      |                 516
                2 | /sales/9614802b/d5b5cc57/150aaa58/1a199920             |                 946
                3 | /customer/a577d741/e0bc954d/c6e0fd20/fead2dbf/6e0f8889 |                 788
                4 | /doc/8863091a/ee194b9d/716852a4/bd3ff291               |                 221
                5 | /doc/8e2df3f4/5d9856b4/eeed2bd2/97b16c45/0c3fc22b      |                 141
                6 | /doc/57068bd4/74eabae3/20a5fb9b                        |                  54
                7 | /customer/811aa406/f293f18b/c9d05b7e                   |                  36
                8 | /doc/7c9f200a/0b20f9eb/f4f3b778/63be4322               |                 398
                9 | /customer/85bd643e/22850ba5/59a57598/b27f3572          |                 509
               10 | /doc/7ec5ef49/757faacc/af257a5a/fe7ea09e               |                 662
(10 rows)

Create different indexes types:


CREATE INDEX idx_uri_btree ON website_data USING btree (uri);
Time: 711.671 ms

CREATE INDEX idx_uri_hash ON website_data USING hash (uri);
Time: 1010.717 ms (00:01.011)

mytest=# create extension btree_gist;
CREATE EXTENSION

CREATE INDEX idx_uri_gist ON website_data USING gist (uri);
Time: 16012.289 ms (00:16.012)

CREATE INDEX idx_uri_spgist ON website_data USING spgist (uri);
Time: 2257.327 ms (00:02.257)

VACUUM ANALYZE;

\di+ idx_uri_*
                                               List of relations
 Schema |      Name      | Type  |  Owner  |    Table     | Persistence | Access method |  Size  | Description
--------+----------------+-------+---------+--------------+-------------+---------------+--------+-------------
 public | idx_uri_btree  | index | donghua | website_data | permanent   | btree         | 64 MB  |
 public | idx_uri_gist   | index | donghua | website_data | permanent   | gist          | 102 MB |
 public | idx_uri_hash   | index | donghua | website_data | permanent   | hash          | 32 MB  |
 public | idx_uri_spgist | index | donghua | website_data | permanent   | spgist        | 66 MB  |


 mytest=# \dt+ website_data;
                                      List of relations
 Schema |     Name     | Type  |  Owner  | Persistence | Access method | Size  | Description
--------+--------------+-------+---------+-------------+---------------+-------+-------------
 public | website_data | table | donghua | permanent   | heap          | 83 MB |
load 'pg_hint_plan';
create extension pg_hint_plan;

Compare execution plans (take 2nd exectuion output to avoid cold cache)

mytest=# explain (analyze,buffers) /*+ SeqScan(website_data) */ select * from website_data where uri='/customer/811aa406/f293f18b/c9d05b7e';
                                                         QUERY PLAN

----------------------------------------------------------------------------------------------------------------------------
 Gather  (cost=1000.00..16792.43 rows=1 width=55) (actual time=0.236..27.291 rows=1 loops=1)
   Workers Planned: 2
   Workers Launched: 2
   Buffers: shared hit=513 read=10071
   ->  Parallel Seq Scan on website_data  (cost=0.00..15792.33 rows=1 width=55) (actual time=11.457..19.653 rows=0 loops=3)
         Filter: (uri = '/customer/811aa406/f293f18b/c9d05b7e'::text)
         Rows Removed by Filter: 333333
         Buffers: shared hit=513 read=10071
 Planning Time: 0.232 ms
 Execution Time: 27.308 ms
(10 rows)

Time: 27.985 ms
mytest=# explain (analyze,buffers) /*+ IndexScan(website_data idx_uri_btree) */ select * from website_data where uri='/customer/811aa406/f293f18b/c9d05b7e';
                                                         QUERY PLAN

-----------------------------------------------------------------------------------------------------------------------------
 Index Scan using idx_uri_btree on website_data  (cost=0.42..8.44 rows=1 width=55) (actual time=0.021..0.022 rows=1 loops=1)
   Index Cond: (uri = '/customer/811aa406/f293f18b/c9d05b7e'::text)
   Buffers: shared hit=4
 Planning Time: 0.157 ms
 Execution Time: 0.033 ms
(5 rows)

Time: 0.538 ms
mytest=# explain (analyze,buffers) /*+ IndexScan(website_data idx_uri_hash) */ select * from website_data where uri='/customer/811aa406/f293f18b/c9d05b7e';
                                                         QUERY PLAN

----------------------------------------------------------------------------------------------------------------------------
 Index Scan using idx_uri_hash on website_data  (cost=0.00..8.02 rows=1 width=55) (actual time=0.021..0.021 rows=1 loops=1)
   Index Cond: (uri = '/customer/811aa406/f293f18b/c9d05b7e'::text)
   Buffers: shared hit=2
 Planning Time: 0.224 ms
 Execution Time: 0.036 ms
(5 rows)

Time: 2.500 ms
mytest=# explain (analyze,buffers) /*+ IndexScan(website_data idx_uri_gist) */ select * from website_data where uri='/customer/811aa406/f293f18b/c9d05b7e';
                                                         QUERY PLAN

----------------------------------------------------------------------------------------------------------------------------
 Index Scan using idx_uri_gist on website_data  (cost=0.41..8.43 rows=1 width=55) (actual time=0.068..0.143 rows=1 loops=1)
   Index Cond: (uri = '/customer/811aa406/f293f18b/c9d05b7e'::text)
   Buffers: shared hit=21
 Planning Time: 0.149 ms
 Execution Time: 0.157 ms
(5 rows)

Time: 0.652 ms
mytest=# explain (analyze,buffers) /*+ IndexScan(website_data idx_uri_spgist) */ select * from website_data where uri='/customer/811aa406/f293f18b/c9d05b7e';
                                                          QUERY PLAN

------------------------------------------------------------------------------------------------------------------------------
 Index Scan using idx_uri_spgist on website_data  (cost=0.29..8.30 rows=1 width=55) (actual time=0.083..0.084 rows=1 loops=1)
   Index Cond: (uri = '/customer/811aa406/f293f18b/c9d05b7e'::text)
   Buffers: shared hit=6
 Planning Time: 0.165 ms
 Execution Time: 0.096 ms
(5 rows)

Time: 0.629 ms
Index TypeStorage Size (MB)Planning TimeExecution Time
BTree640.1570.033
Hash320.2240.036
Gist1020.1490.157
Spgist660.1650.096
No Index-0.23227.308

No comments:

Post a Comment