概要
データにNULLが多く含まれる場合のインデックス性能について確認します。
Oracle Database 12c Enterprise Edition Release 12.2.0.1.0 - 64bit Production
CentOS Linux release 7.3.1611 (Core)
検証
パターンA NULLデータが全体の1%の場合
create table TAB_NULL_DATA
(
id number(10)
, cdate date
, flg char(2)
, str varchar2(1024)
)
insert into TAB_NULL_DATA nologging
with seq(i) as
(
select 1 from dual
union all
select i+1 from seq where i < 1000000
)
select i
, sysdate
, decode( mod( i, 100 ), 0, null, mod( i, 100 ) )
, rpad( 'a', 1023, 'b' )
from seq
create index IDX1_TAB_NULL_DATA on TAB_NULL_DATA ( flg )
カージナリティとNULL割合の確認
select min(cnt) min_cnt, max(case when flg is not null then cnt end) max_cnt, sum(cnt) sum_cnt
, sum(case when flg is null then cnt else 0 end) null_cnt
from (select
flg
, count(*) cnt
from TAB_NULL_DATA
group by
flg
)
MIN_CNT MAX_CNT SUM_CNT NULL_CNT
10 10 10000000 100000
1 row selected.
Elapsed: 00:00:00.08
Execution Plan
Plan hash value: 2602492627
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
| 0 | SELECT STATEMENT | | 1 | 19 | 15 (7)| 00:00:01 |
| 1 | HASH GROUP BY | | 1 | 19 | 15 (7)| 00:00:01 |
| 2 | TABLE ACCESS BY INDEX ROWID BATCHED| TAB_NULL_DATA | 10 | 190 | 14 (0)| 00:00:01 |
|* 3 | INDEX RANGE SCAN | IDX1_TAB_NULL_DATA | 10 | | 3 (0)| 00:00:01 |
Predicate Information (identified by operation id):
3 - access("FLG"='1')
Statistics
11 recursive calls
11 db block gets
15 consistent gets
12 physical reads
2096 redo size
488 bytes sent via SQL*Net to client
499 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
1 rows processed
SQL>
パターンB NULLデータが全体の10%の場合
insert into TAB_NULL_DATA nologging
with seq(i) as
(
select 1 from dual
union all
select i+1 from seq where i < 1000000
)
select i
, sysdate
, decode( mod( i, 10 ), 0, null, mod( i, 100 ) )
, rpad( 'a', 1023, 'b' )
from seq
カージナリティとNULL割合の確認
MIN_CNT MAX_CNT SUM_CNT NULL_CNT
10 10 10000000 1000000
1 row selected.
Elapsed: 00:00:00.07
Execution Plan
Plan hash value: 2602492627
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
| 0 | SELECT STATEMENT | | 1 | 18 | 15 (7)| 00:00:01 |
| 1 | HASH GROUP BY | | 1 | 18 | 15 (7)| 00:00:01 |
| 2 | TABLE ACCESS BY INDEX ROWID BATCHED| TAB_NULL_DATA | 10 | 180 | 14 (0)| 00:00:01 |
|* 3 | INDEX RANGE SCAN | IDX1_TAB_NULL_DATA | 10 | | 3 (0)| 00:00:01 |
Predicate Information (identified by operation id):
3 - access("FLG"='1')
Statistics
1 recursive calls
0 db block gets
13 consistent gets
12 physical reads
0 redo size
488 bytes sent via SQL*Net to client
499 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
1 rows processed
SQL>
パターンB NULLデータが全体の50%の場合
insert into TAB_NULL_DATA nologging
with seq(i) as
(
select 1 from dual
union all
select i+1 from seq where i < 1000000
)
select i
, sysdate
, decode( mod( i, 2 ), 0, null, mod( i, 100 ) )
, rpad( 'a', 1023, 'b' )
from seq
カージナリティとNULL割合の確認
MIN_CNT MAX_CNT SUM_CNT NULL_CNT
10 10 10000000 5000000
1 row selected.
Elapsed: 00:00:00.02
Execution Plan
Plan hash value: 2602492627
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
| 0 | SELECT STATEMENT | | 1 | 14 | 15 (7)| 00:00:01 |
| 1 | HASH GROUP BY | | 1 | 14 | 15 (7)| 00:00:01 |
| 2 | TABLE ACCESS BY INDEX ROWID BATCHED| TAB_NULL_DATA | 10 | 140 | 14 (0)| 00:00:01 |
|* 3 | INDEX RANGE SCAN | IDX1_TAB_NULL_DATA | 10 | | 3 (0)| 00:00:01 |
Predicate Information (identified by operation id):
3 - access("FLG"='1')
Statistics
1 recursive calls
0 db block gets
13 consistent gets
12 physical reads
0 redo size
488 bytes sent via SQL*Net to client
499 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
1 rows processed
SQL>
パターンB NULLデータが全体の90%の場合
insert into TAB_NULL_DATA nologging
with seq(i) as
(
select 1 from dual
union all
select i+1 from seq where i < 1000000
)
select i
, sysdate
, decode( mod( i, 100 ), 0, mod( i, 100 ), null )
, rpad( 'a', 1023, 'b' )
from seq
カージナリティとNULL割合の確認
MIN_CNT MAX_CNT SUM_CNT NULL_CNT
10 10 10000000 9000000
1 row selected.
Elapsed: 00:00:00.07
Execution Plan
Plan hash value: 2602492627
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
| 0 | SELECT STATEMENT | | 1 | 10 | 15 (7)| 00:00:01 |
| 1 | HASH GROUP BY | | 1 | 10 | 15 (7)| 00:00:01 |
| 2 | TABLE ACCESS BY INDEX ROWID BATCHED| TAB_NULL_DATA | 10 | 100 | 14 (0)| 00:00:01 |
|* 3 | INDEX RANGE SCAN | IDX1_TAB_NULL_DATA | 10 | | 3 (0)| 00:00:01 |
Predicate Information (identified by operation id):
3 - access("FLG"='0')
Statistics
1 recursive calls
0 db block gets
13 consistent gets
10 physical reads
0 redo size
488 bytes sent via SQL*Net to client
499 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
1 rows processed
SQL>
と言う事で、NULLデータの割合は性能に影響しませんでした。