Overview
We will compare "IN", "EXISTS", "JOIN", "NOT IN", "NOT EXISTS" and "OUTER JOIN" between Oracle 21c and PostgresQL 15.
The execution summary is as below:
QueryID | Query | Oracle | PostgreSQL |
---|---|---|---|
Query 1: "IN" | select sum(v) from c where pid in (select pid from p where v>0.95); | 00:00:00.01 | 76.142 ms |
Query 2: "EXISTS" | select sum(v) from c where exists (select pid from p where c.pid = p.pid and v>0.95); | Same plan as query 1 | Same plan as query 1 |
Query 3: "JOIN" | select sum(c.v) from c join p on c.pid=p.pid and p.v>0.95; | Same plan as query 1 | Same plan as query 1 |
Query 4: "NOT IN" | select sum(v) from c where pid not in (select pid from p where v<=0.95); | 00:00:00.01 | 112.662 ms |
Query 5: "NOT EXISTS" | select sum(v) from c where not exists (select pid from p where c.pid = p.pid and v<=0.95); | Same plan as query 4 | 103.857 ms |
Query 6: "OUTER JOIN" | select sum(c.v) from c left outer join (select * from p where p.v<=0.95 ) p2 on c.pid=p2.pid where p2.pid is null; | 00:00:00.02 | Same plan as query 5 |
Setup before testing
Oracle
-- setup demo table
drop table c;
drop table p;
create table p(pid int primary key, v number(3,2), extra char(100));
create table c(cid int primary key, pid int, v number(3,2), extra char(100),
constraint c_p_fk foreign key (pid) references p(pid));
insert into p select level, round(dbms_random.value,2), 'x'
from dual connect by level <= 1000;
insert into c select level, floor(dbms_random.value(1,1000)),round(dbms_random.value,2), 'x'
from dual connect by level <= 100000;
commit;
create index idx_p_v on p(v);
create index idx_c_pid on c(pid);
exec dbms_stats.gather_table_stats('','P');
exec dbms_stats.gather_table_stats('','C');
-- formating related
set timing on;
set autotrace on explain
set linesize 120
set tab off
PostgreSQL
-- setup demo table
drop table c;
drop table p;
create table p(pid int primary key, v numeric(3,2), extra char(100));
create table c(cid int primary key, pid int, v numeric(3,2), extra char(100),
constraint c_p_fk foreign key (pid) references p(pid));
insert into p select level, round(random()::numeric,2), 'x'
from generate_series(1,1000) level;
insert into c select level, floor(random()::numeric*1000)+1,round(random()::numeric,2), 'x'
from generate_series(1,1000000) level;
create index idx_p_v on p(v);
create index idx_c_pid on c(pid);
vacuum analyze p;
vacuum analyze c;
-- formating related
\timing
Database Version
SQL> select banner_full from v$version;
BANNER_FULL
----------------------------------------------------------------------
Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
Version 21.9.0.0.0
mytest=# select version();
version
---------------------------------------------------------------------------------------------------------
PostgreSQL 15.2 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 8.5.0 20210514 (Red Hat 8.5.0-16), 64-bit
(1 row)
Query 1: "IN"
select sum(v) from c
where pid in (select pid from p where v>0.95);
Oracle
SQL> select sum(v) from c
2 where pid in (select pid from p where v>0.95);
SUM(V)
----------
2484.1
Elapsed: 00:00:00.01
Execution Plan
----------------------------------------------------------
Plan hash value: 423065292
---------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 484 (1)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
|* 2 | HASH JOIN | | 5005 | 70070 | 484 (1)| 00:00:01 |
|* 3 | VIEW | index$_join$_002 | 50 | 350 | 5 (0)| 00:00:01 |
|* 4 | HASH JOIN | | | | | |
|* 5 | INDEX RANGE SCAN | IDX_P_V | 50 | 350 | 2 (0)| 00:00:01 |
| 6 | INDEX FAST FULL SCAN| SYS_C008978 | 50 | 350 | 4 (0)| 00:00:01 |
| 7 | TABLE ACCESS FULL | C | 100K| 683K| 479 (1)| 00:00:01 |
---------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("PID"="PID")
3 - filter("V">0.95)
4 - access(ROWID=ROWID)
5 - access("V">0.95)
Note
-----
- this is an adaptive plan
PostgreSQL
mytest=# select sum(v) from c
mytest-# where pid in (select pid from p where v>0.95);
sum
----------
24233.38
(1 row)
Time: 76.142 ms
mytest=# explain
mytest-# select sum(v) from c
mytest-# where pid in (select pid from p where v>0.95);
QUERY PLAN
-----------------------------------------------------------------------------------------------------
Finalize Aggregate (cost=24522.20..24522.21 rows=1 width=32)
-> Gather (cost=24521.98..24522.19 rows=2 width=32)
Workers Planned: 2
-> Partial Aggregate (cost=23521.98..23521.99 rows=1 width=32)
-> Hash Join (cost=23.88..23470.94 rows=20417 width=4)
Hash Cond: (c.pid = p.pid)
-> Parallel Seq Scan on c (cost=0.00..22348.67 rows=416667 width=8)
-> Hash (cost=23.27..23.27 rows=49 width=4)
-> Bitmap Heap Scan on p (cost=4.65..23.27 rows=49 width=4)
Recheck Cond: (v > 0.95)
-> Bitmap Index Scan on idx_p_v (cost=0.00..4.64 rows=49 width=0)
Index Cond: (v > 0.95)
(12 rows)
Time: 0.715 ms
Query 2: "EXISTS"
select sum(v) from c
where exists (select pid from p where c.pid = p.pid and v>0.95);
Oracle
SQL> select sum(v) from c
2 where exists (select pid from p where c.pid = p.pid and v>0.95);
SUM(V)
----------
2484.1
Elapsed: 00:00:00.01
Execution Plan
----------------------------------------------------------
Plan hash value: 423065292
---------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 484 (1)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
|* 2 | HASH JOIN | | 5005 | 70070 | 484 (1)| 00:00:01 |
|* 3 | VIEW | index$_join$_002 | 50 | 350 | 5 (0)| 00:00:01 |
|* 4 | HASH JOIN | | | | | |
|* 5 | INDEX RANGE SCAN | IDX_P_V | 50 | 350 | 2 (0)| 00:00:01 |
| 6 | INDEX FAST FULL SCAN| SYS_C008978 | 50 | 350 | 4 (0)| 00:00:01 |
| 7 | TABLE ACCESS FULL | C | 100K| 683K| 479 (1)| 00:00:01 |
---------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("C"."PID"="P"."PID")
3 - filter("V">0.95)
4 - access(ROWID=ROWID)
5 - access("V">0.95)
Note
-----
- this is an adaptive plan
PostgreSQL
mytest=# select sum(v) from c
mytest-# where exists (select pid from p where c.pid = p.pid and v>0.95);
sum
----------
24233.38
(1 row)
Time: 76.109 ms
mytest=# explain
mytest-# select sum(v) from c
mytest-# where exists (select pid from p where c.pid = p.pid and v>0.95);
QUERY PLAN
-----------------------------------------------------------------------------------------------------
Finalize Aggregate (cost=24522.20..24522.21 rows=1 width=32)
-> Gather (cost=24521.98..24522.19 rows=2 width=32)
Workers Planned: 2
-> Partial Aggregate (cost=23521.98..23521.99 rows=1 width=32)
-> Hash Join (cost=23.88..23470.94 rows=20417 width=4)
Hash Cond: (c.pid = p.pid)
-> Parallel Seq Scan on c (cost=0.00..22348.67 rows=416667 width=8)
-> Hash (cost=23.27..23.27 rows=49 width=4)
-> Bitmap Heap Scan on p (cost=4.65..23.27 rows=49 width=4)
Recheck Cond: (v > 0.95)
-> Bitmap Index Scan on idx_p_v (cost=0.00..4.64 rows=49 width=0)
Index Cond: (v > 0.95)
(12 rows)
Time: 0.723 ms
Query 3: "JOIN"
select sum(c.v) from c join p on c.pid=p.pid and p.v>0.95;
Oracle
SQL> select sum(c.v) from c join p on c.pid=p.pid and p.v>0.95;
SUM(C.V)
----------
2484.1
Elapsed: 00:00:00.01
Execution Plan
----------------------------------------------------------
Plan hash value: 423065292
---------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 484 (1)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
|* 2 | HASH JOIN | | 5005 | 70070 | 484 (1)| 00:00:01 |
|* 3 | VIEW | index$_join$_002 | 50 | 350 | 5 (0)| 00:00:01 |
|* 4 | HASH JOIN | | | | | |
|* 5 | INDEX RANGE SCAN | IDX_P_V | 50 | 350 | 2 (0)| 00:00:01 |
| 6 | INDEX FAST FULL SCAN| SYS_C008978 | 50 | 350 | 4 (0)| 00:00:01 |
| 7 | TABLE ACCESS FULL | C | 100K| 683K| 479 (1)| 00:00:01 |
---------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("C"."PID"="P"."PID")
3 - filter("P"."V">0.95)
4 - access(ROWID=ROWID)
5 - access("P"."V">0.95)
Note
-----
- this is an adaptive plan
PostgreSQL
mytest=# select sum(c.v) from c join p on c.pid=p.pid and p.v>0.95;
sum
----------
24233.38
(1 row)
Time: 75.596 ms
mytest=# explain
mytest-# select sum(c.v) from c join p on c.pid=p.pid and p.v>0.95;
QUERY PLAN
-----------------------------------------------------------------------------------------------------
Finalize Aggregate (cost=24522.20..24522.21 rows=1 width=32)
-> Gather (cost=24521.98..24522.19 rows=2 width=32)
Workers Planned: 2
-> Partial Aggregate (cost=23521.98..23521.99 rows=1 width=32)
-> Hash Join (cost=23.88..23470.94 rows=20417 width=4)
Hash Cond: (c.pid = p.pid)
-> Parallel Seq Scan on c (cost=0.00..22348.67 rows=416667 width=8)
-> Hash (cost=23.27..23.27 rows=49 width=4)
-> Bitmap Heap Scan on p (cost=4.65..23.27 rows=49 width=4)
Recheck Cond: (v > 0.95)
-> Bitmap Index Scan on idx_p_v (cost=0.00..4.64 rows=49 width=0)
Index Cond: (v > 0.95)
(12 rows)
Time: 0.707 ms
Query 4: "NOT IN"
select sum(v) from c
where pid not in (select pid from p where v<=0.95);
Oracle
SQL> select sum(v) from c
2 where pid not in (select pid from p where v<=0.95);
SUM(V)
----------
2484.1
Elapsed: 00:00:00.01
Execution Plan
----------------------------------------------------------
Plan hash value: 257737780
----------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 485 (1)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
|* 2 | HASH JOIN RIGHT ANTI SNA| | 4905 | 68670 | 485 (1)| 00:00:01 |
|* 3 | VIEW | index$_join$_002 | 950 | 6650 | 6 (0)| 00:00:01 |
|* 4 | HASH JOIN | | | | | |
|* 5 | INDEX RANGE SCAN | IDX_P_V | 950 | 6650 | 3 (0)| 00:00:01 |
| 6 | INDEX FAST FULL SCAN | SYS_C008978 | 950 | 6650 | 4 (0)| 00:00:01 |
| 7 | TABLE ACCESS FULL | C | 100K| 683K| 479 (1)| 00:00:01 |
----------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("PID"="PID")
3 - filter("V"<=0.95)
4 - access(ROWID=ROWID)
5 - access("V"<=0.95)
PostgreSQL
mytest=# select sum(v) from c
mytest-# where pid not in (select pid from p where v<=0.95);
sum
----------
24233.38
(1 row)
mytest=# explain
mytest-# select sum(v) from c
mytest-# where pid not in (select pid from p where v<=0.95);
QUERY PLAN
--------------------------------------------------------------------------------------
Finalize Aggregate (cost=24944.27..24944.28 rows=1 width=32)
-> Gather (cost=24944.05..24944.26 rows=2 width=32)
Workers Planned: 2
-> Partial Aggregate (cost=23944.05..23944.06 rows=1 width=32)
-> Parallel Seq Scan on c (cost=32.88..23423.21 rows=208333 width=4)
Filter: (NOT (hashed SubPlan 1))
SubPlan 1
-> Seq Scan on p (cost=0.00..30.50 rows=951 width=4)
Filter: (v <= 0.95)
(9 rows)
Time: 0.493 ms
Query 5: "NOT EXISTS"
select sum(v) from c
where not exists (select pid from p where c.pid = p.pid and v<=0.95);
Oracle
SQL> select sum(v) from c
2 where not exists (select pid from p where c.pid = p.pid and v<=0.95);
SUM(V)
----------
2484.1
Elapsed: 00:00:00.02
Execution Plan
----------------------------------------------------------
Plan hash value: 3094349839
---------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 485 (1)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
|* 2 | HASH JOIN RIGHT ANTI | | 4905 | 68670 | 485 (1)| 00:00:01 |
|* 3 | VIEW | index$_join$_002 | 950 | 6650 | 6 (0)| 00:00:01 |
|* 4 | HASH JOIN | | | | | |
|* 5 | INDEX RANGE SCAN | IDX_P_V | 950 | 6650 | 3 (0)| 00:00:01 |
| 6 | INDEX FAST FULL SCAN| SYS_C008978 | 950 | 6650 | 4 (0)| 00:00:01 |
| 7 | TABLE ACCESS FULL | C | 100K| 683K| 479 (1)| 00:00:01 |
---------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("C"."PID"="P"."PID")
3 - filter("V"<=0.95)
4 - access(ROWID=ROWID)
5 - access("V"<=0.95)
PostgreSQL
mytest=# select sum(v) from c
mytest-# where not exists (select pid from p where c.pid = p.pid and v<=0.95);
sum
----------
24233.38
(1 row)
Time: 103.857 ms
mytest=# explain
mytest-# select sum(v) from c
mytest-# where not exists (select pid from p where c.pid = p.pid and v<=0.95);
QUERY PLAN
-------------------------------------------------------------------------------------------
Finalize Aggregate (cost=25186.02..25186.03 rows=1 width=32)
-> Gather (cost=25185.80..25186.01 rows=2 width=32)
Workers Planned: 2
-> Partial Aggregate (cost=24185.80..24185.81 rows=1 width=32)
-> Hash Anti Join (cost=42.39..24134.76 rows=20417 width=4)
Hash Cond: (c.pid = p.pid)
-> Parallel Seq Scan on c (cost=0.00..22348.67 rows=416667 width=8)
-> Hash (cost=30.50..30.50 rows=951 width=4)
-> Seq Scan on p (cost=0.00..30.50 rows=951 width=4)
Filter: (v <= 0.95)
(10 rows)
Time: 0.751 ms
Query 6: "OUTER JOIN""
select sum(c.v) from c left outer join (select * from p where p.v<=0.95 ) p2
on c.pid=p2.pid where p2.pid is null;
Oracle
SQL> select sum(c.v) from c left outer join (select * from p where p.v<=0.95 ) p2
2 on c.pid=p2.pid where p2.pid is null;
SUM(C.V)
----------
2484.1
Elapsed: 00:00:00.02
Execution Plan
----------------------------------------------------------
Plan hash value: 3112675040
----------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 485 (1)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
|* 2 | FILTER | | | | | |
|* 3 | HASH JOIN RIGHT OUTER | | 4905 | 68670 | 485 (1)| 00:00:01 |
|* 4 | VIEW | index$_join$_003 | 950 | 6650 | 6 (0)| 00:00:01 |
|* 5 | HASH JOIN | | | | | |
|* 6 | INDEX RANGE SCAN | IDX_P_V | 950 | 6650 | 3 (0)| 00:00:01 |
| 7 | INDEX FAST FULL SCAN| SYS_C008978 | 950 | 6650 | 4 (0)| 00:00:01 |
| 8 | TABLE ACCESS FULL | C | 100K| 683K| 479 (1)| 00:00:01 |
----------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("P"."PID" IS NULL)
3 - access("C"."PID"="P"."PID"(+))
4 - filter("P"."V"(+)<=0.95)
5 - access(ROWID=ROWID)
6 - access("P"."V"<=0.95)
PostgreSQL
mytest=# select sum(c.v) from c left outer join (select * from p where p.v<=0.95 ) p2
mytest-# on c.pid=p2.pid where p2.pid is null;
sum
----------
24233.38
(1 row)
Time: 104.988 ms
mytest=# explain
mytest-# select sum(c.v) from c left outer join (select * from p where p.v<=0.95 ) p2
mytest-# on c.pid=p2.pid where p2.pid is null;
QUERY PLAN
-------------------------------------------------------------------------------------------
Finalize Aggregate (cost=25186.02..25186.03 rows=1 width=32)
-> Gather (cost=25185.80..25186.01 rows=2 width=32)
Workers Planned: 2
-> Partial Aggregate (cost=24185.80..24185.81 rows=1 width=32)
-> Hash Anti Join (cost=42.39..24134.76 rows=20417 width=4)
Hash Cond: (c.pid = p.pid)
-> Parallel Seq Scan on c (cost=0.00..22348.67 rows=416667 width=8)
-> Hash (cost=30.50..30.50 rows=951 width=4)
-> Seq Scan on p (cost=0.00..30.50 rows=951 width=4)
Filter: (v <= 0.95)
(10 rows)
Time: 0.844 ms