When Oracle gather table statistics, it gathered information about column statistics, including low value and high value for that column, and estimate matching records based on uniform distribution or histograms data.
What is the behavior for Oracle optimizer to estimate matching rows for values beyond known boundary?
Setup the demo table
create table t (d date, v int);
create index t_n1 on t(d);
Populate the table with data (2022-03-01 to 2022-03-31, with 10,000 rows per day)
insert into t
with
v_date as (
select date '2022-02-28' + level d
from dual connect by level <= 31
),
v_data as (
select level v from dual connect by level <=10000
)
select d, v from v_date, v_data;
commit;
Gather Table Statistics
exec dbms_stats.gather_table_stats('','T');
Populate the table with data (2022-04-01 to 2022-04-07, with 10,000 rows per day)
Remember don't gather the statistics for this DML operation.
insert into t
with
v_date as (
select date '2022-03-31' + level d
from dual connect by level <= 7
),
v_data as (
select level v from dual connect by level <=10000
)
select d, v from v_date, v_data;
commit;
Verify the data populated
select d,count(*) from t group by d order by d;
D | COUNT |
---|---|
2022-MAR-01 00:00:00 | 10000 |
2022-MAR-02 00:00:00 | 10000 |
2022-MAR-03 00:00:00 | 10000 |
2022-MAR-04 00:00:00 | 10000 |
2022-MAR-05 00:00:00 | 10000 |
2022-MAR-06 00:00:00 | 10000 |
2022-MAR-07 00:00:00 | 10000 |
2022-MAR-08 00:00:00 | 10000 |
2022-MAR-09 00:00:00 | 10000 |
2022-MAR-10 00:00:00 | 10000 |
2022-MAR-11 00:00:00 | 10000 |
2022-MAR-12 00:00:00 | 10000 |
2022-MAR-13 00:00:00 | 10000 |
2022-MAR-14 00:00:00 | 10000 |
2022-MAR-15 00:00:00 | 10000 |
2022-MAR-16 00:00:00 | 10000 |
2022-MAR-17 00:00:00 | 10000 |
2022-MAR-18 00:00:00 | 10000 |
2022-MAR-19 00:00:00 | 10000 |
2022-MAR-20 00:00:00 | 10000 |
2022-MAR-21 00:00:00 | 10000 |
2022-MAR-22 00:00:00 | 10000 |
2022-MAR-23 00:00:00 | 10000 |
2022-MAR-24 00:00:00 | 10000 |
2022-MAR-25 00:00:00 | 10000 |
2022-MAR-26 00:00:00 | 10000 |
2022-MAR-27 00:00:00 | 10000 |
2022-MAR-28 00:00:00 | 10000 |
2022-MAR-29 00:00:00 | 10000 |
2022-MAR-30 00:00:00 | 10000 |
2022-MAR-31 00:00:00 | 10000 |
2022-APR-01 00:00:00 | 10000 |
2022-APR-02 00:00:00 | 10000 |
2022-APR-03 00:00:00 | 10000 |
2022-APR-04 00:00:00 | 10000 |
2022-APR-05 00:00:00 | 10000 |
2022-APR-06 00:00:00 | 10000 |
2022-APR-07 00:00:00 | 10000 |
Verify Statistics Gathered
select low_value,high_value
from user_tab_columns
where table_name='T' and column_name='D';
SQL> select low_value,high_value
2 from user_tab_columns
3* where table_name='T' and column_name='D';
LOW_VALUE HIGH_VALUE
_________________ _________________
787A0301010101 787A031F010101
set serveroutput on
declare
low_value date;
high_value date;
begin
dbms_stats.convert_raw_value('787A0301010101',low_value);
dbms_stats.convert_raw_value('787A031F010101',high_value);
dbms_output.put_line('low_value:'||to_char(low_value,'YYYY-MON-DD'));
dbms_output.put_line('high_value:'||to_char(high_value,'YYYY-MON-DD'));
end;
/
SQL> set serveroutput on
SQL> declare
2 low_value date;
3 high_value date;
4 begin
5 dbms_stats.convert_raw_value('787A0301010101',low_value);
6 dbms_stats.convert_raw_value('787A031F010101',high_value);
7 dbms_output.put_line('low_value: '||to_char(low_value,'YYYY-MON-DD'));
8 dbms_output.put_line('high_value: '||to_char(high_value,'YYYY-MON-DD'));
9 end;
10* /
low_value: 2022-MAR-01
high_value: 2022-MAR-31
PL/SQL procedure successfully completed.
Now build test cases with different "D" values
Test 1: D is known value within the range (D = '2022-Mar-15');
select /*+ GATHER_PLAN_STATISTICS */ count(*)
from t where d=to_date('2022-mar-15','yyyy-mon-dd');
SELECT * FROM TABLE(DBMS_XPLAN.display_cursor(format=>'ALLSTATS LAST'));
SQL> select /*+ GATHER_PLAN_STATISTICS */ count(*)
2* from t where d=to_date('2022-mar-15','yyyy-mon-dd');
COUNT(*)
___________
10000
SQL> SELECT * FROM TABLE(DBMS_XPLAN.display_cursor(format=>'ALLSTATS LAST'));
PLAN_TABLE_OUTPUT
_______________________________________________________________________________________
SQL_ID 4f7917bkzugpw, child number 0
-------------------------------------
select /*+ GATHER_PLAN_STATISTICS */ count(*) from t where
d=to_date('2022-mar-15','yyyy-mon-dd')
Plan hash value: 3395180883
------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.01 | 41 |
| 1 | SORT AGGREGATE | | 1 | 1 | 1 |00:00:00.01 | 41 |
|* 2 | INDEX RANGE SCAN| T_N1 | 1 | 10000 | 10000 |00:00:00.01 | 41 |
------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("D"=TO_DATE(' 2022-03-15 00:00:00', 'syyyy-mm-dd hh24:mi:ss'))
20 rows selected.
Test 2: D is known value at the range boundary (D = '2022-Mar-31');
select /*+ GATHER_PLAN_STATISTICS */ count(*)
from t where d=to_date('2022-mar-31','yyyy-mon-dd');
SELECT * FROM TABLE(DBMS_XPLAN.display_cursor(format=>'ALLSTATS LAST'));
SQL> select /*+ GATHER_PLAN_STATISTICS */ count(*)
2* from t where d=to_date('2022-mar-31','yyyy-mon-dd');
COUNT(*)
___________
10000
SQL> SELECT * FROM TABLE(DBMS_XPLAN.display_cursor(format=>'ALLSTATS LAST'));
PLAN_TABLE_OUTPUT
_______________________________________________________________________________________
SQL_ID 1wwkh3s9gshs9, child number 0
-------------------------------------
select /*+ GATHER_PLAN_STATISTICS */ count(*) from t where
d=to_date('2022-mar-31','yyyy-mon-dd')
Plan hash value: 3395180883
------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.01 | 42 |
| 1 | SORT AGGREGATE | | 1 | 1 | 1 |00:00:00.01 | 42 |
|* 2 | INDEX RANGE SCAN| T_N1 | 1 | 10000 | 10000 |00:00:00.01 | 42 |
------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("D"=TO_DATE(' 2022-03-31 00:00:00', 'syyyy-mm-dd hh24:mi:ss'))
20 rows selected.
Test 3: D is unknown value outside range boundary (D = '2022-Apr-01');
select /*+ GATHER_PLAN_STATISTICS */ count(*)
from t where d=to_date('2022-apr-01','yyyy-mon-dd');
SELECT * FROM TABLE(DBMS_XPLAN.display_cursor(format=>'ALLSTATS LAST'));
SQL> select /*+ GATHER_PLAN_STATISTICS */ count(*)
2* from t where d=to_date('2022-apr-01','yyyy-mon-dd');
COUNT(*)
___________
10000
SQL> SELECT * FROM TABLE(DBMS_XPLAN.display_cursor(format=>'ALLSTATS LAST'));
PLAN_TABLE_OUTPUT
_______________________________________________________________________________________
SQL_ID cnwqc5sxus6s4, child number 0
-------------------------------------
select /*+ GATHER_PLAN_STATISTICS */ count(*) from t where
d=to_date('2022-apr-01','yyyy-mon-dd')
Plan hash value: 3395180883
------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.01 | 68 |
| 1 | SORT AGGREGATE | | 1 | 1 | 1 |00:00:00.01 | 68 |
|* 2 | INDEX RANGE SCAN| T_N1 | 1 | 9667 | 10000 |00:00:00.01 | 68 |
------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("D"=TO_DATE(' 2022-04-01 00:00:00', 'syyyy-mm-dd hh24:mi:ss'))
20 rows selected.
Test 4: D is unknown value outside range boundary further (D = '2022-Apr-07');
select /*+ GATHER_PLAN_STATISTICS */ count(*)
from t where d=to_date('2022-apr-07','yyyy-mon-dd');
SELECT * FROM TABLE(DBMS_XPLAN.display_cursor(format=>'ALLSTATS LAST'));
SQL> select /*+ GATHER_PLAN_STATISTICS */ count(*)
2* from t where d=to_date('2022-apr-07','yyyy-mon-dd');
COUNT(*)
___________
10000
SQL> SELECT * FROM TABLE(DBMS_XPLAN.display_cursor(format=>'ALLSTATS LAST'));
PLAN_TABLE_OUTPUT
_______________________________________________________________________________________
SQL_ID fyyh5k7x6g7kr, child number 0
-------------------------------------
select /*+ GATHER_PLAN_STATISTICS */ count(*) from t where
d=to_date('2022-apr-07','yyyy-mon-dd')
Plan hash value: 3395180883
------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.01 | 32 |
| 1 | SORT AGGREGATE | | 1 | 1 | 1 |00:00:00.01 | 32 |
|* 2 | INDEX RANGE SCAN| T_N1 | 1 | 7667 | 10000 |00:00:00.01 | 32 |
------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("D"=TO_DATE(' 2022-04-07 00:00:00', 'syyyy-mm-dd hh24:mi:ss'))
20 rows selected.
Test 5: D is unknown value outside range boundary even further (D = '2022-Apr-30');
select /*+ GATHER_PLAN_STATISTICS */ count(*)
from t where d=to_date('2022-apr-30','yyyy-mon-dd');
SELECT * FROM TABLE(DBMS_XPLAN.display_cursor(format=>'ALLSTATS LAST'));
SQL> select /*+ GATHER_PLAN_STATISTICS */ count(*)
2* from t where d=to_date('2022-apr-30','yyyy-mon-dd');
COUNT(*)
___________
0
SQL> SELECT * FROM TABLE(DBMS_XPLAN.display_cursor(format=>'ALLSTATS LAST'));
PLAN_TABLE_OUTPUT
_______________________________________________________________________________________
SQL_ID 4f0nk1mjax1tu, child number 0
-------------------------------------
select /*+ GATHER_PLAN_STATISTICS */ count(*) from t where
d=to_date('2022-apr-30','yyyy-mon-dd')
Plan hash value: 3395180883
------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.01 | 3 |
| 1 | SORT AGGREGATE | | 1 | 1 | 1 |00:00:00.01 | 3 |
|* 2 | INDEX RANGE SCAN| T_N1 | 1 | 1 | 0 |00:00:00.01 | 3 |
------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("D"=TO_DATE(' 2022-04-30 00:00:00', 'syyyy-mm-dd hh24:mi:ss'))
20 rows selected.
Observation:
The amount of estimated matching rows reduced granularly for values outside the low_value and high_value boundary.
Test | Value | E-Rows | A-Rows |
---|---|---|---|
Test 1 | 2022-Mar-15 | 10000 | 10000 |
Test 2 | 2022-Mar-31 | 10000 | 10000 |
Test 3 | 2022-Apr-1 | 9667 | 10000 |
Test 4 | 2022-Apr-7 | 7667 | 10000 |
Test 5 | 2022-Apr-30 | 1 | 0 |
No comments:
Post a Comment