7.
Select 쿼리
1)
Data 연산 및 Data Type 변환
-
Greenplum의 날짜 연산은 interval 을 사용.
-
Ex)
Date/Timestamp + interval
SELECT reg_date,
date_trunc('day', reg_date),
date_trunc('month', reg_date),
reg_Date + '1 day'::interval,
reg_date + '1 month'::interval
FROM gp_master
|
n
숫자타입의 문자를 숫자 타입으로 변환
-
Data
형변환을 위해서는 컬럼::Type 으로 변환.
-
Data
형변환을 위해서는 cast 함수를 사용 가능
SELECT age,
trim(to_char(age, '99999')),
cast(age AS VARCHAR(10)),
age::bpchar
FROM gp_master
|
-- 권고
SELECT gpid,
gpname,
to_char(reg_date, 'yyyymmdd')
FROM gp_master
WHERE reg_date
<= to_Date('20100101', 'yyyymmdd')
;
-- 비권고 아래 쿼리 지양
SELECT
gpid,
gpname,
to_char(reg_date, 'yyyymmdd')
FROM gp_master
WHERE to_char(reg_date, ‘yyyymmdd’) <= '20100101'
;
|
1. Date <-> Char변환
- to_date(‘20100101’,
‘yyyymmdd’)
- to_char(reg_dt, ‘yyyymmdd’)
2. 상수를 직접 대입하여 데이터 조회
Ex 1)
WHERE reg_date <= to_Date('20100101', 'yyyymmdd');
Ex 2) WHERE reg_date <= '20100101'::timestamp;
Ex 3) WHERE reg_date <= '20100101'::date;
Ex 4) WHERE reg_date <= cast('20100101' as date) ;
|
2)
쿼리 권고 사항
n
Distinct 자제
-
Distinct는 CPU 작업을 사용하는 반면에 Group by는
IO분산 작업하여 쿼리 성능 향상.
변경 전
|
변경 후
|
select count(distinct MBR_ID)
from MBR_INFO
|
select count(*)
from (
select MBR_ID
from MBR_INFO
group by MBR_ID
)
b
|
n
컬럼 가공 금지
-
조인 컬럼에 대해서 컬럼 가공을 자제하여야 함.
변경 전
|
변경 후
|
SELECT
A.MBRID, B.SUM_DT
FROM
MBR_INFO A,
TB_SUM_DD B
WHERE a.MBRID||a.SUMDT
= b.MBRID||b.SUMDT
|
SELECT A.MBRID,
B.SUM_DT
FROM MBR_INFO A,
TB_SUM_DD B
WHERE a.MBRID
= b.MBRID
AND a.SUMDT = b.SUMDT
|
SELECT A.MBRID, B.SUM_DT
FROM MBR_INFO A
JOIN TB_SUM_DD B
ON a.MBRID = b.MBRID.
AND a.SUMDT = b.SUMDT
|
n
파티션 Scan
-
파티션 쿼리를 이용하기 위해서는 파티션 키에 like 절 보다는 between 또는 < > 의 부등호를 이용하여 함
-
파키션 컬럼을 가공하지 말아야 함.
변경 전
|
변경 후
|
SELECT
A.MBRID, B.SUM_DT
FROM
MBR_INFO A,
TB_SUM_DD B
WHERE a.MBRID||a.SUMDT
= b.MBRID||b.SUMDT
|
SELECT A.MBRID,
B.SUM_DT
FROM MBR_INFO A,
TB_SUM_DD B
WHERE a.MBRID
= b.MBRID
AND a.SUMDT = b.SUMDT
|
SELECT A.MBRID, B.SUM_DT
FROM MBR_INFO A
JOIN TB_SUM_DD B
ON a.MBRID = b.MBRID.
AND a.SUMDT = b.SUMDT
|
-
NOT
IN, IN, Existis, Not Exists 보다는 Join 을 권고
-
IN,
NOT IN, Exists, Not Exists 는 CPU 작업을 하는데 반하여,
Join 으로 변경할 경우 IO 작업으로 실행 됨
-
Greenplum의 최적의 성능을 내기 위해서는 CPU작업보다는 IO작업으로
분산하여 야 함.
변경 전
|
변경 후
|
SELECT *
FROM TB_A
WHERE col NOT IN (SELECT col FROM TB_B)
|
SELECT *
FROM TB_A A
LEFT OUTER JOIN TB_B B
ON A.col = B.col
WHERE B.col IS NULL
|
SELECT *
FROM TB_A
WHERE col IN (SELECT
col FROM TB_B)
|
SELECT *
FROM TB_A A
JOIN ( SELECT COL
FROM TB_B
GROUP BY COL ) B
ON A.col = B.col
|
SELECT
SUMDT,
sum(balance)/100000000
FROM
TB_BILL_SUM_DD
WHERE
SUMDT='200912'
AND
SEQNO NOT IN
( SELECT SEQNO
FROM TB_BILL_CDR
WHERE SUMDT='200912'
)
|
SELECT
SUMDT,
sum(balance)/100000000
FROM
TB_BILL_SUM_DD a
LEFT OUTER JOIN (
SELECT SEQNO
FROM TB_BILL_CDR
WHERE SUMDT='200912'
) b
on a.SEQNO= b. SEQNO
WHERE
SUMDT='200912'
AND
b. SEQNO is null
GROUP BY SUMDT
|
n
Correlated Sub-queries
-
Correlated
sub 쿼리를 Left outer join 으로 쿼리 튜닝
튜닝 전
|
튜닝 후
|
SELECT *
FROM facts.transaction t
WHERE NOT EXISTS
(SELECT 1
FROM dimensions.customer c
WHERE c.customerid = t.customerid);
|
SELECT t.*
FROM facts.transaction t
LEFT
OUTER JOIN dimensions.customer c
ON c.customerid =
t.customerid
WHERE c.customerid IS NULL;
|
3)
주요함수 및 쿼리 변환 예제(Oracle
=> Greenplum)
Oracle
|
Greenplum
| |
String 함수
|
||, Substr,
trim, rtrim, ltrim, lpad, rpad, upper, low, Replace
|
좌동
|
집계 함수
|
Count, avg, max, min, corr, stddev, bit_and, bit_or …
|
좌동
|
변환 함수
|
to_date, to_char, to_number,
nvl, decode, Case when 절
|
좌동
|
OLAP 함수
|
cube, rollup, GROUPING SETS
|
좌동
|
Window 함수
|
row_number() over(ORDER BY…))
RANK() OVER (PARTITION BY .. ORDER BY …)
|
좌동
|
집합 함수
|
Union, union all, intersect
|
좌동
|
minus
|
except
| |
시스템 함수
(Oracle SQL)
|
Sysdate
|
Now()
|
select*from employees
where rownum <= 10
order by name;
|
select*from employees
order by name
limit 10;
| |
Sequence
(Oracle SQL)
|
select sequence_name.nextval from dual;
|
select nextval(‘sequence_name’);
|
Left outer join
(Oracle SQL)
|
Select a.*, b.*
From tb1 a, tb2 b
Where a.col = b.col(+)
|
Select a.*, b.*
From tb1 a
Left outer join tb2 b
On a.col = b.col
|
날짜변환
예제
|
to_char(sysdate -1, ‘yyyymmdd’)
|
to_char(current_date -1, ‘yyyymmdd’)
|
to_date(‘20100101’, ‘yyyymmdd’)
|
To_date(‘20100101’, ‘yyyymmdd’)
‘20100101’::date
| |
다음달 1일 추출
|
to_char(date_trunc('month', ‘20100301’::date)
+ interval '1 month', 'yyyymmdd')
| |
45일전의 Char type의 날짜 ‘YYYYMMDD’ 유형
|
to_char(date_trunc('day', now())::date -
45, 'yyyymmdd');
| |
120분 전의 시간 ‘hh24mi’ 유형
|
substr(to_char(now() + '-120 minutes', 'hh24mi'), 1, 3)||'0'
|
4)
주요함수 및 쿼리 변환 예제(Sybase
=> Greenplum)
Sybase
|
Greenplum
|
convert(char(8),getdate(),112)
|
to_char(current_date,'YYYYMMDD')
|
dateformat( today(*) –3,'yyyymmdd'),
dateformat (today(*) – 3,'mmdd')
|
to_char(current_date-3, 'yyyymmdd'),
to_char(current_date-3,'mmdd')
|
convert(char(6), dateadd(month-1,
getdate()) ,112)
|
to_char(current_date + interval ‘-1
month','YYYYMM')
|
Left(col, 2)
|
substr(col,1, 2),
|
Right(col ,3)
|
substr(col,length(col)-2)
|
datediff( mm, convert(date, trim(dt) ),
convert(date, trim(yyyymm)||'01' ) )
|
datediff( ‘mm’, trim(dt), trim(yyyymm)||'01')
|
ISNULL(COLUMN,' ')
ISNULL(COLUMN,0)
|
COALESCE(COLUMN,' ')
COALESCE(COLUMN,0)
|
IFNULL(A,'NULL','NOT NULL')
|
SELECT CASE WHEN A IS NULL THEN 'NULL'
ELSE
'NOT NULL' END
|
dow(date-expression)
SUNDAY : 1, MONDAY : 2…. SATURDAY: 7
|
to_char(date, 'D')
SUNDAY : 1, MONDAY : 2…. SATURDAY: 7
|
SELECT Top 10 *
FROM tb_test
|
SELECT *
FROM tb_test
LIMIT 10
|
SELECT
C.COL1,
D.COL2
FROM TB_C C,
TB_D D,
TB_E E
WHERE C. ID = '000388097'
AND D.DT = '20081201'
AND D.AMT > 0
AND C.SEQ = D.SEQ /* inner join */
AND C.SEQ *= E.SEQ
/left outer join */
|
SELECT
C.COL1,
D.COL2
FROM TB_C C
JOIN TB_D D
ON C.SEQ = D.SEQ
LEFT OUTER JOIN TB_E E
ON C. SEQ = E. SEQ
WHERE C.ID = '000388097'
AND D.DT = '20081201'
AND D.AMT > 0
|
5)
분석 함수 및 Window Function
n
ROLLUP
SELECT pn, vn, sum(prc*qty)
FROM sale
GROUP BY ROLLUP(pn, vn)
ORDER BY 1,2,3;
|
n
GROUPING
SETS
SELECT pn, vn,
sum(prc*qty)
FROM sale
GROUP BY GROUPING SETS
( (pn, vn), (pn), () )
ORDER BY 1,2,3;
|
n
CUBE
SELECT pn, vn,
sum(prc*qty)
FROM sale
GROUP BY CUBE(pn, vn)
ORDER BY 1,2,3;
|
n
GROUP_ID
SELECT a, b, c,
sum(p*q), group_id()
FROM sales
GROUP BY ROLLUP(a,b),
CUBE(b,c)
HAVING group_id()<1
ORDER BY a,b,c;
|
n
Row_number() over()
SELECT * ,
row_number()
OVER()
FROM sale
ORDER BY cn;
|
row_number
| cn | vn | pn | dt | qty | prc
------------+----+----+-----+------------+------+------
1
| 1 | 10 | 200 | 1401-03-01 |
1 | 0
2
| 1 | 30 | 300 | 1401-05-02 |
1 | 0
3
| 1 | 50 | 400 | 1401-06-01 |
1 | 0
4
| 1 | 30 | 500 | 1401-06-01 |
12 | 5
5
| 1 | 20 | 100 | 1401-05-01 |
1 | 0
6
| 2 | 50 | 400 | 1401-06-01 |
1 | 0
7
| 2 | 40 | 100 | 1401-01-01 |
1100 | 2400
8
| 3 | 40 | 200 | 1401-04-01 |
1 | 0
(8 rows)
|
SELECT * ,
row_number()
OVER(PARTITION BY cn)
FROM sale
ORDER BY cn;
|
row_number
| cn | vn | pn | dt | qty | prc
------------+----+----+-----+------------+------+------
1
| 1 | 10 | 200 | 1401-03-01 |
1 | 0
2
| 1 | 30 | 300 | 1401-05-02 |
1 | 0
3
| 1 | 50 | 400 | 1401-06-01 |
1 | 0
4
| 1 | 30 | 500 | 1401-06-01 |
12 | 5
5
| 1 | 20 | 100 | 1401-05-01 |
1 | 0
1
| 2 | 50 | 400 | 1401-06-01 |
1 | 0
2
| 2 | 40 | 100 | 1401-01-01 |
1100 | 2400
1
| 3 | 40 | 200 | 1401-04-01 |
1 | 0
(8 rows)
|
n
Row_number() over(ORDER
BY…))
SELECT vn, sum(prc*qty)
FROM sale
GROUP BY vn
ORDER BY 2 DESC;
|
vn | sum
----+---------
40 |
2640002
30 | 180
50 | 0
20 | 0
10 | 0
(5 rows)
|
SELECT vn, sum(prc*qty), rank()
OVER (ORDER BY sum(prc*qty) DESC)
FROM sale
GROUP BY vn
ORDER BY 2 DESC;
|
vn |
sum | rank
----+---------+------
40 |
2640002 | 1
30 |
180 | 2
50 |
0 | 3
20 |
0 | 3
10 |
0 | 3
(5 rows)
|
n
RANK() OVER
SELECT to_char(log_date, 'yyyy') yr
,
gpid
,
RANK() OVER (PARTITION BY to_char(log_date, 'yyyy') ORDER BY sum(buying_amt)
desc) as rnk
FROM
gp_buying_log
GROUP BY to_char(log_date, 'yyyy'), gpid
ORDER BY yr, rnk
|
1. 인라인 뷰로 전환 후 분석함수 적용
2. 기타 분석함수 사용법은 모두 동일
-
[COUNT | MAX | MIN | RANK() etc ] OVER(PARTITION … )
|
감사합니다
답글삭제