2017년 8월 27일 일요일

GPDB transpose

Greenplum Database Transpose

1. row_data 생성
drop TABLE if exists public.row_data;
CREATE TABLE public.row_data
(
  param varchar(100),
  g_id varchar(100),
  val   numeric
)
WITH (APPENDONLY=true, compresslevel=5)
DISTRIBUTED BY (param);
truncate table public.row_data;
insert into public.row_data
select 'param_'||trim(to_char(a, '000000')) param
     , 'gid_'||trim(to_char(b, '00')) g_id
     , round((random() * 10)::numeric, 5) val
from   generate_series(1, 100) a
     , generate_series(1, 50) b
;
2.1 case when transpose
select  param
       , sum(case when g_id = 'gid_01' then val else 0 end) gid_01
       , sum(case when g_id = 'gid_02' then val else 0 end) gid_02
       , sum(case when g_id = 'gid_03' then val else 0 end) gid_03
       , sum(case when g_id = 'gid_04' then val else 0 end) gid_04
       , sum(case when g_id = 'gid_05' then val else 0 end) gid_05
       , sum(case when g_id = 'gid_06' then val else 0 end) gid_06
       , sum(case when g_id = 'gid_07' then val else 0 end) gid_07
       , sum(case when g_id = 'gid_08' then val else 0 end) gid_08
       , sum(case when g_id = 'gid_09' then val else 0 end) gid_09
       , sum(case when g_id = 'gid_10' then val else 0 end) gid_10
       , sum(case when g_id = 'gid_11' then val else 0 end) gid_11
       , sum(case when g_id = 'gid_12' then val else 0 end) gid_12
       , sum(case when g_id = 'gid_13' then val else 0 end) gid_13
       , sum(case when g_id = 'gid_14' then val else 0 end) gid_14
       , sum(case when g_id = 'gid_15' then val else 0 end) gid_15
       , sum(case when g_id = 'gid_16' then val else 0 end) gid_16
       , sum(case when g_id = 'gid_17' then val else 0 end) gid_17
       , sum(case when g_id = 'gid_18' then val else 0 end) gid_18
       , sum(case when g_id = 'gid_19' then val else 0 end) gid_19
       , sum(case when g_id = 'gid_20' then val else 0 end) gid_20
       , sum(case when g_id = 'gid_21' then val else 0 end) gid_21
       , sum(case when g_id = 'gid_22' then val else 0 end) gid_22
       , sum(case when g_id = 'gid_23' then val else 0 end) gid_23
       , sum(case when g_id = 'gid_24' then val else 0 end) gid_24
       , sum(case when g_id = 'gid_25' then val else 0 end) gid_25
       , sum(case when g_id = 'gid_26' then val else 0 end) gid_26
       , sum(case when g_id = 'gid_27' then val else 0 end) gid_27
       , sum(case when g_id = 'gid_28' then val else 0 end) gid_28
       , sum(case when g_id = 'gid_29' then val else 0 end) gid_29
       , sum(case when g_id = 'gid_30' then val else 0 end) gid_30
       , sum(case when g_id = 'gid_31' then val else 0 end) gid_31
       , sum(case when g_id = 'gid_32' then val else 0 end) gid_32
       , sum(case when g_id = 'gid_33' then val else 0 end) gid_33
       , sum(case when g_id = 'gid_34' then val else 0 end) gid_34
       , sum(case when g_id = 'gid_35' then val else 0 end) gid_35
       , sum(case when g_id = 'gid_36' then val else 0 end) gid_36
       , sum(case when g_id = 'gid_37' then val else 0 end) gid_37
       , sum(case when g_id = 'gid_38' then val else 0 end) gid_38
       , sum(case when g_id = 'gid_39' then val else 0 end) gid_39
       , sum(case when g_id = 'gid_40' then val else 0 end) gid_40
       , sum(case when g_id = 'gid_41' then val else 0 end) gid_41
       , sum(case when g_id = 'gid_42' then val else 0 end) gid_42
       , sum(case when g_id = 'gid_43' then val else 0 end) gid_43
       , sum(case when g_id = 'gid_44' then val else 0 end) gid_44
       , sum(case when g_id = 'gid_45' then val else 0 end) gid_45
       , sum(case when g_id = 'gid_46' then val else 0 end) gid_46
       , sum(case when g_id = 'gid_47' then val else 0 end) gid_47
       , sum(case when g_id = 'gid_48' then val else 0 end) gid_48
       , sum(case when g_id = 'gid_49' then val else 0 end) gid_49
       , sum(case when g_id = 'gid_50' then val else 0 end) gid_50
from    public.row_data
group by  param
order by 1


2.2. pivot sum
select  param
        , a[01] gid_01
 , a[02] gid_02
 , a[03] gid_03
 , a[04] gid_04
 , a[05] gid_05
 , a[06] gid_06
 , a[07] gid_07
 , a[08] gid_08
 , a[09] gid_09
 , a[10] gid_10
 , a[11] gid_11
 , a[12] gid_12
 , a[13] gid_13
 , a[14] gid_14
 , a[15] gid_15
 , a[16] gid_16
 , a[17] gid_17
 , a[18] gid_18
 , a[19] gid_19
 , a[20] gid_20
 , a[21] gid_21
 , a[22] gid_22
 , a[23] gid_23
 , a[24] gid_24
 , a[25] gid_25
 , a[26] gid_26
 , a[27] gid_27
 , a[28] gid_28
 , a[29] gid_29
 , a[30] gid_30
 , a[31] gid_31
 , a[32] gid_32
 , a[33] gid_33
 , a[34] gid_34
 , a[35] gid_35
 , a[36] gid_36
 , a[37] gid_37
 , a[38] gid_38
 , a[39] gid_39
 , a[40] gid_40
 , a[41] gid_41
 , a[42] gid_42
 , a[43] gid_43
 , a[44] gid_44
 , a[45] gid_45
 , a[46] gid_46
 , a[47] gid_47
 , a[48] gid_48
 , a[49] gid_49
 , a[50] gid_50
FROM  (
  select param, pivot_sum(
     array['gid_01', 'gid_02', 'gid_03', 'gid_04', 'gid_05', 'gid_06', 'gid_07', 'gid_08', 'gid_09', 'gid_10',
           'gid_11', 'gid_12', 'gid_13', 'gid_14', 'gid_15', 'gid_16', 'gid_17', 'gid_18', 'gid_19', 'gid_20',
           'gid_21', 'gid_22', 'gid_23', 'gid_24', 'gid_25', 'gid_26', 'gid_27', 'gid_28', 'gid_29', 'gid_30',
           'gid_31', 'gid_32', 'gid_33', 'gid_34', 'gid_35', 'gid_36', 'gid_37', 'gid_38', 'gid_39', 'gid_40',
           'gid_41', 'gid_42', 'gid_43', 'gid_44', 'gid_45', 'gid_46', 'gid_47', 'gid_48', 'gid_49', 'gid_50' ]
     , g_id
     , val
     ) a
  from    public.row_data               
  group by param
 ) pvt
order by param


select  param
        , a[01] gid_01
 , a[02] gid_02
 , a[03] gid_03
 , a[04] gid_04
 , a[05] gid_05
 , a[06] gid_06
 , a[07] gid_07
 , a[08] gid_08
 , a[09] gid_09
 , a[10] gid_10
 , a[11] gid_11
 , a[12] gid_12
 , a[13] gid_13
 , a[14] gid_14
 , a[15] gid_15
 , a[16] gid_16
 , a[17] gid_17
 , a[18] gid_18
 , a[19] gid_19
 , a[20] gid_20
 , a[21] gid_21
 , a[22] gid_22
 , a[23] gid_23
 , a[24] gid_24
 , a[25] gid_25
 , a[26] gid_26
 , a[27] gid_27
 , a[28] gid_28
 , a[29] gid_29
 , a[30] gid_30
 , a[31] gid_31
 , a[32] gid_32
 , a[33] gid_33
 , a[34] gid_34
 , a[35] gid_35
 , a[36] gid_36
 , a[37] gid_37
 , a[38] gid_38
 , a[39] gid_39
 , a[40] gid_40
 , a[41] gid_41
 , a[42] gid_42
 , a[43] gid_43
 , a[44] gid_44
 , a[45] gid_45
 , a[46] gid_46
 , a[47] gid_47
 , a[48] gid_48
 , a[49] gid_49
 , a[50] gid_50
FROM  (
  select param, pivot_sum(
     (select array_agg(g_id order by g_id) from   (select g_id from   public.row_data group by g_id ) a)
     , g_id
     , val
     ) a
  from    public.row_data               
  group by param
 ) pvt
order by param

2.2 madlib transpose
DROP TABLE IF EXISTS public.row_data_pivot;
SELECT madlib.pivot('public.row_data', 'public.row_data_pivot', 'param', 'g_id', 'val');
SELECT * FROM public.row_data_pivot ORDER BY param;
select *
from   public.row_data
where  param = 'param_000001'

댓글 없음:

댓글 쓰기

Greenplum Backup & Restore

Greenplum에서는 gpbackup과 gprestore를 이용해서 대량의 데이터를 병렬로 백업/병렬로 복구를 지원하고 있습니다. Full 백업이외에도 incremental 백업을 지원하고 있습니다.  - incremental 백업시에는 반드시 사전...