Greenplum Database: Greenlpum 파티션 컬럼 데이터 타입 선정

파티션 스캔을 다양한 형태 지원하기 위해서는 파티션 컬럼에 char 타입 보다는 varchar 타입을 사용합니다. 고민하지말고 파티션 / 인덱스 컬럼에는 char 보다는 varchar를 사용하세요..^^;

테스트 요약

1. 파티션 key를 char로 한 경우

   1) set optimizer = on

       dt = '20200101'  --파티션 스캔 

       dt = substr('20200101', 1, 8) -- 파티션 스캔  

    2) set optimzer = off

       dt = '20200101'  --파티션 스캔 

       dt = substr('20200101', 1, 8) -- 파티션 스캔 안됨, 월 파티션의 경우에는 

                                        substr(xxxxxxxx, 1, 6) 으로 들어올 가능성도 있음.

2. 파티션 key를 varchar로 한 경우

   1) set optimizer = on

       dt = '20200101'  --파티션 스캔 

       dt = substr('20200101', 1, 8) -- 파티션 스캔  

    2) set optimzer = off

       dt = '20200101'  --파티션 스캔 

       dt = substr('20200101', 1, 8) -- 파티션 스캔 

테스트 결과

CHAR 테스트

CREATE TABLE public.tab_char (

col1 int4 NOT NULL DEFAULT 0,

col2 bpchar(20) NULL,

col3 timestamp NOT NULL,

dt   char(8) 

)

DISTRIBUTED BY (col1)

PARTITION BY RANGE(dt) 

          (

          partition p20200101 start ('20200101'::bpchar) end ('20200102'::bpchar), 

          partition p20200102 start ('20200102'::bpchar) end ('20200103'::bpchar), 

          partition p20200103 start ('20200103'::bpchar) end ('20200104'::bpchar), 

          partition p20200104 start ('20200104'::bpchar) end ('20200105'::bpchar), 

          DEFAULT PARTITION pother

          );

set optimizer=on;

explain analyze          

select * from public.tab_char

where dt = '20200101'

Gather Motion 4:1  (slice1; segments: 4)  (cost=0.00..431.00 rows=1 width=40) (actual time=1.213..1.213 rows=0 loops=1)

  ->  Sequence  (cost=0.00..431.00 rows=1 width=40) (never executed)

        ->  Partition Selector for tab_char (dynamic scan id: 1)  (cost=10.00..100.00 rows=25 width=4) (never executed)

              Partitions selected: 1 (out of 5)

        ->  Dynamic Seq Scan on tab_char (dynamic scan id: 1)  (cost=0.00..431.00 rows=1 width=40) (never executed)

              Filter: (dt = '20200101'::bpchar)

              Partitions scanned:  Avg 1.0 (out of 5) x 4 workers.  Max 1 parts (seg0).

explain analyze          

select * from public.tab_char

where dt = substr('20200101', 1,8)

Gather Motion 4:1  (slice1; segments: 4)  (cost=0.00..431.00 rows=1 width=40) (actual time=2.712..2.712 rows=0 loops=1)

  ->  Sequence  (cost=0.00..431.00 rows=1 width=40) (never executed)

        ->  Partition Selector for tab_char (dynamic scan id: 1)  (cost=10.00..100.00 rows=25 width=4) (never executed)

              Partitions selected: 2 (out of 5)

        ->  Dynamic Seq Scan on tab_char (dynamic scan id: 1)  (cost=0.00..431.00 rows=1 width=40) (never executed)

              Filter: ((dt)::text = '20200101'::text)

              Partitions scanned:  Avg 2.0 (out of 5) x 4 workers.  Max 2 parts (seg0).

set optimizer=off;

explain analyze          

select * from public.tab_char

where dt = '20200101'

Gather Motion 4:1  (slice1; segments: 4)  (cost=0.00..705.00 rows=41 width=132) (actual time=3.096..3.096 rows=0 loops=1)

  ->  Append  (cost=0.00..705.00 rows=11 width=132) (never executed)

        ->  Seq Scan on tab_char_1_prt_pother  (cost=0.00..352.50 rows=6 width=132) (never executed)

              Filter: (dt = '20200101'::bpchar)

        ->  Seq Scan on tab_char_1_prt_p20200101  (cost=0.00..352.50 rows=6 width=132) (never executed)

              Filter: (dt = '20200101'::bpchar)

explain analyze          

select * from public.tab_char

where dt = substr('20200101', 1,8)==> 파티션 스캔 안됨, 월 파티션일 경우 이런 경우 많을 것으로 예상

Gather Motion 4:1  (slice1; segments: 4)  (cost=0.00..2015.00 rows=101 width=132) (actual time=1.771..1.771 rows=0 loops=1)

  ->  Append  (cost=0.00..2015.00 rows=26 width=132) (never executed)

        ->  Seq Scan on tab_char_1_prt_pother  (cost=0.00..403.00 rows=6 width=132) (never executed)

              Filter: ((dt)::text = '20200101'::text)

        ->  Seq Scan on tab_char_1_prt_p20200101  (cost=0.00..403.00 rows=6 width=132) (never executed)

              Filter: ((dt)::text = '20200101'::text)

        ->  Seq Scan on tab_char_1_prt_p20200102  (cost=0.00..403.00 rows=6 width=132) (never executed)

              Filter: ((dt)::text = '20200101'::text)

        ->  Seq Scan on tab_char_1_prt_p20200103  (cost=0.00..403.00 rows=6 width=132) (never executed)

              Filter: ((dt)::text = '20200101'::text)

        ->  Seq Scan on tab_char_1_prt_p20200104  (cost=0.00..403.00 rows=6 width=132) (never executed)

              Filter: ((dt)::text = '20200101'::text)

Planning time: 0.297 ms

VARCHAR 테스트

CREATE TABLE public.tab_varchar (

col1 int4 NOT NULL DEFAULT 0,

col2 bpchar(20) NULL,

col3 timestamp NOT NULL,

dt   varchar(8) 

)

DISTRIBUTED BY (col1)

PARTITION BY RANGE(dt) 

(

          partition p20200101 start ('20200101') end ('20200102'), 

          partition p20200102 start ('20200102') end ('20200103'), 

          partition p20200103 start ('20200103') end ('20200104'), 

          partition p20200104 start ('20200104') end ('20200105'), 

          DEFAULT PARTITION pother

);

set optimizer=on;

explain analyze          

select * from public.tab_varchar

where dt = '20200101'

Gather Motion 4:1  (slice1; segments: 4)  (cost=0.00..431.00 rows=1 width=40) (actual time=2.753..2.753 rows=0 loops=1)

  ->  Sequence  (cost=0.00..431.00 rows=1 width=40) (never executed)

        ->  Partition Selector for tab_varchar (dynamic scan id: 1)  (cost=10.00..100.00 rows=25 width=4) (never executed)

              Partitions selected: 1 (out of 5)

        ->  Dynamic Seq Scan on tab_varchar (dynamic scan id: 1)  (cost=0.00..431.00 rows=1 width=40) (never executed)

              Filter: ((dt)::text = '20200101'::text)

              Partitions scanned:  Avg 1.0 (out of 5) x 4 workers.  Max 1 parts (seg0).

explain analyze          

select * from public.tab_varchar

where dt = substr('20200101', 1,8)

Gather Motion 4:1  (slice1; segments: 4)  (cost=0.00..431.00 rows=1 width=40) (actual time=2.305..2.305 rows=0 loops=1)

  ->  Sequence  (cost=0.00..431.00 rows=1 width=40) (never executed)

        ->  Partition Selector for tab_varchar (dynamic scan id: 1)  (cost=10.00..100.00 rows=25 width=4) (never executed)

              Partitions selected: 1 (out of 5)

        ->  Dynamic Seq Scan on tab_varchar (dynamic scan id: 1)  (cost=0.00..431.00 rows=1 width=40) (never executed)

              Filter: ((dt)::text = '20200101'::text)

              Partitions scanned:  Avg 1.0 (out of 5) x 4 workers.  Max 1 parts (seg0).

set optimizer=off;

explain analyze          

select * from public.tab_varchar

where dt = '20200101'

Gather Motion 4:1  (slice1; segments: 4)  (cost=0.00..710.00 rows=41 width=130) (actual time=2.071..2.071 rows=0 loops=1)

  ->  Append  (cost=0.00..710.00 rows=11 width=130) (never executed)

        ->  Seq Scan on tab_varchar_1_prt_pother  (cost=0.00..355.00 rows=6 width=130) (never executed)

              Filter: ((dt)::text = '20200101'::text)

        ->  Seq Scan on tab_varchar_1_prt_p20200101  (cost=0.00..355.00 rows=6 width=130) (never executed)

              Filter: ((dt)::text = '20200101'::text)

Planning time: 0.459 ms

explain analyze          

select * from public.tab_varchar

where dt = substr('20200101', 1,8)

Gather Motion 4:1  (slice1; segments: 4)  (cost=0.00..710.00 rows=41 width=130) (actual time=1.252..1.252 rows=0 loops=1)

  ->  Append  (cost=0.00..710.00 rows=11 width=130) (never executed)

        ->  Seq Scan on tab_varchar_1_prt_pother  (cost=0.00..355.00 rows=6 width=130) (never executed)

              Filter: ((dt)::text = '20200101'::text)

        ->  Seq Scan on tab_varchar_1_prt_p20200101  (cost=0.00..355.00 rows=6 width=130) (never executed)

              Filter: ((dt)::text = '20200101'::text)

Planning time: 0.366 ms

#########################

varchar로 every 로 파티션 생성 방법

CREATE FUNCTION pg_catalog.textplusint(text, integer) returns text STRICT IMMUTABLE LANGUAGE SQL AS $$SELECT ($2+$1::int)::text;$$;

CREATE OPERATOR pg_catalog.+ ( PROCEDURE = pg_catalog.textplusint, LEFTARG=text, RIGHTARG=integer, COMMUTATOR=OPERATOR(pg_catalog.+));

CREATE TABLE public.tab1

(

  col1 int  not null default 0 ,

  col2 char(20) ,

  col3 timestamp not null,

  dt   varchar(8)

)

distributed by (col1) 

partition by range(dt)

(

   start (100) inclusive end (110) EXCLUSIVE every (1), 

   start (200) inclusive end (210) EXCLUSIVE every (1),

   default partition pother

);

실행 후 DDL export 한 결과

CREATE TABLE public.tab1 (

col1 int4 NOT NULL DEFAULT 0,

col2 bpchar(20) NULL,

col3 timestamp NOT NULL,

dt varchar(8) NULL

)

DISTRIBUTED BY (col1)

PARTITION BY RANGE(dt) 

(

          START ('100'::character varying(8)) END ('110'::character varying(8)) EVERY (1), 

          START ('200'::character varying(8)) END ('210'::character varying(8)) EVERY (1), 

          DEFAULT PARTITION pother

);

explain analyze          

select * from public.tab1 where dt = substr('101111', 1, 3)=====> 파티션 스캔 잘 됨.

Gather Motion 4:1 (slice1; segments: 4) (cost=0.00..710.00 rows=41 width=130) (actual time=1.247..1.247 rows=0 loops=1)

-> Append (cost=0.00..710.00 rows=11 width=130) (never executed)

-> Seq Scan on tab1_1_prt_pother (cost=0.00..355.00 rows=6 width=130) (never executed)

Filter: ((dt)::text = '101'::text)

-> Seq Scan on tab1_1_prt_3 (cost=0.00..355.00 rows=6 width=130) (never executed)

Filter: ((dt)::text = '101'::text

Greenplum Database

2020년 7월 15일 수요일

Greenlpum 파티션 컬럼 데이터 타입 선정

댓글 없음:

댓글 쓰기

Greenplum 7.5에서 Analyze 기능개선

블로그 보관함

태그