GaussDB(DWS)性能调优:典型不下推语句整改案例

场景1:With-Recursive contains only values rte is not shippable

根因:递归语句的某个分支中没有FROM字句(只有 VALUES 或者类似 SELECT 1 这样的语句)

案例1:递归驱动分支没有FROM字句

原始语句

SELECT  T.RPT_ITEM_ID, --报表项ID
T.RPT_ITEM_CODE,
T.USER_GROUP_CODE AS USER_GROUP_CODE --用户组
FROM BIF.BIF_RPT_ITEM_DEF_T T,
(WITH recursive cte AS (
SELECT DISTINCT TRIM(SUBSTR('' :: text, INSTR('', ',', 1, 1) + 1, INSTR('', ',', 1, 2) - INSTR('', ',', 1, 1) - 1)) AS cte_RPT_ITEM_CODE, 
1 AS level
FROM (SELECT '') AS tb0
UNION ALL
SELECT DISTINCT TRIM(SUBSTR('' :: text, INSTR('', ',', 1, cte.level + 1) + 1, INSTR('', ',', 1, cte.level + 2) - INSTR('', ',', 1, cte.level + 1) - 1)),
cte.level + 1
FROM (SELECT '') AS tb0, cte
WHERE cte.level + 1 <= LENGTH('') - LENGTH(REPLACE('', ',', '')) - 1
)
SELECT 
DISTINCT cte_RPT_ITEM_CODE AS RPT_ITEM_CODE 
FROM cte
) T5
WHERE NVL(INSTR(T.RPT_ITEM_FREQUENCE, 'M'), 0) > 0
AND T.RPT_ITEM_CODE = NVL(T5.RPT_ITEM_CODE, T.RPT_ITEM_CODE)
AND T.RPT_ITEM_TYPE = 1 --是否是叶子报表项,1=是,0=否,基本报表项
AND T.ENABLE_FLAG = 1
AND T.VERSION = '202308' --使用快照,增加条件限制
ORDER BY T.RPT_ITEM_ID

改写语句

SELECT  T.RPT_ITEM_ID, --报表项ID
T.RPT_ITEM_CODE,
T.USER_GROUP_CODE AS USER_GROUP_CODE --用户组
FROM BIF.BIF_RPT_ITEM_DEF_T T,
(WITH recursive cte AS (
SELECT DISTINCT TRIM(SUBSTR('' :: text, INSTR('', ',', 1, 1) + 1, INSTR('', ',', 1, 2) - INSTR('', ',', 1, 1) - 1)) AS cte_RPT_ITEM_CODE, 
1 AS level
FROM generate_series(1, 1) AS tb0
UNION ALL
SELECT DISTINCT TRIM(SUBSTR('' :: text, INSTR('', ',', 1, cte.level + 1) + 1, INSTR('', ',', 1, cte.level + 2) - INSTR('', ',', 1, cte.level + 1) - 1)),
cte.level + 1
FROM (SELECT '') AS tb0, cte
WHERE cte.level + 1 <= LENGTH('') - LENGTH(REPLACE('', ',', '')) - 1
)
SELECT 
DISTINCT cte_RPT_ITEM_CODE AS RPT_ITEM_CODE 
FROM cte
) T5
WHERE NVL(INSTR(T.RPT_ITEM_FREQUENCE, 'M'), 0) > 0
AND T.RPT_ITEM_CODE = NVL(T5.RPT_ITEM_CODE, T.RPT_ITEM_CODE)
AND T.RPT_ITEM_TYPE = 1 --是否是叶子报表项,1=是,0=否,基本报表项
AND T.ENABLE_FLAG = 1
AND T.VERSION = '202308' --使用快照,增加条件限制
ORDER BY T.RPT_ITEM_ID

修改点比对

GaussDB(DWS)性能调优:典型不下推语句整改案例

案例2:递归驱动分支没有FROM字句

原始语句

SELECT A.DYNM_COMP_ID,
DECODE(B.LINE_NO, 1, '202308', A.VERSION)
FROM BIF.BIF_DYNM_COMP_SOU_TBL_V A,
(WITH recursive cte AS (
SELECT 1 AS level
UNION ALL
SELECT cte.level + 1
FROM cte
WHERE cte.level + 1 < 3
)
SELECT 
level as LINE_NO 
FROM cte
) B
WHERE EXISTS (SELECT 1
FROM BIF.BIF_RPT_ITEM_DEF_MV RPT,
BIF.BIF_PUB_SUBJECT_AREA_T SBJ,
BIF.BIF_SNAPSHORT_SUBJECT_V TYP
WHERE A.DYNM_COMP_ID = RPT.DYNM_COMP_ID
AND RPT.VERSION = 'current'
AND RPT.SUBJECT_AREA_ID = SBJ.SUBJECT_AREA_ID
AND SBJ.SUBJECT_AREA_CODE =TYP.SUBJECT_CODE
AND TYP.SUBJECT_TYPE ='TAX')
AND A.VERSION = 'current'

改写语句

SELECT A.DYNM_COMP_ID,
DECODE(B.LINE_NO, 1, '202308', A.VERSION)
FROM BIF.BIF_DYNM_COMP_SOU_TBL_V A,
(SELECT 
* 
FROM generate_series(1, 2) AS cte(LINE_NO)
) B
WHERE EXISTS (SELECT 1
FROM BIF.BIF_RPT_ITEM_DEF_MV RPT,
BIF.BIF_PUB_SUBJECT_AREA_T SBJ,
BIF.BIF_SNAPSHORT_SUBJECT_V TYP
WHERE A.DYNM_COMP_ID = RPT.DYNM_COMP_ID
AND RPT.VERSION = 'current'
AND RPT.SUBJECT_AREA_ID = SBJ.SUBJECT_AREA_ID
AND SBJ.SUBJECT_AREA_CODE =TYP.SUBJECT_CODE
AND TYP.SUBJECT_TYPE ='TAX')
AND A.VERSION = 'current'

修改点比对

GaussDB(DWS)性能调优:典型不下推语句整改案例

案例3:递归驱动分支是VALUES字句

原始语句

WITH RECURSIVE t(n) AS (
VALUES (1)
UNION ALL
SELECT 
n+1 
FROM t 
WHERE n < (SELECT MAX(LENGTH(COMP_CODE)-LENGTH(REPLACE(COMP_CODE,',','')))+1 MAX_TOKENS
FROM (SELECT DEPT_CODE,
to_char(APPLICABLE_GEO_PC_CODE) COMP_CODE
FROM SDIHR.MDM_CDM_DEPT_ACT_INFO_T_3600)
)
)
SELECT n AS LVL FROM t

改写语句

WITH RECURSIVE t(n) AS (
SELECT * FROM generate_series(1, 1)
UNION ALL
SELECT 
n+1 
FROM t 
WHERE n < (SELECT MAX(LENGTH(COMP_CODE)-LENGTH(REPLACE(COMP_CODE,',','')))+1 MAX_TOKENS
FROM (SELECT DEPT_CODE,
to_char(APPLICABLE_GEO_PC_CODE) COMP_CODE
FROM SDIHR.MDM_CDM_DEPT_ACT_INFO_T_3600)
)
)
SELECT n AS LVL FROM t

修改点比对

GaussDB(DWS)性能调优:典型不下推语句整改案例

案例4:递归驱动分支是VALUES字句

原始语句

WITH RECURSIVE t(n) AS (
VALUES (1)
UNION ALL
SELECT 
n+1 
FROM t 
WHERE n < (SELECT MAX(LENGTH(COMP_CODE)-LENGTH(REPLACE(COMP_CODE,',','')))+1 MAX_TOKENS
FROM (SELECT DEPT_CODE,
to_char(APPLICABLE_GEO_PC_CODE) COMP_CODE
FROM SDIHR.MDM_CDM_DEPT_ACT_INFO_T_3600))
)
SELECT n AS LVL FROM t

改写语句

SELECT 
* 
FROM generate_series(1, (SELECT MAX(LENGTH(COMP_CODE)-LENGTH(REPLACE(COMP_CODE,',','')))+1 MAX_TOKENS
FROM (SELECT DEPT_CODE,
to_char(APPLICABLE_GEO_PC_CODE) COMP_CODE
FROM SDIHR.MDM_CDM_DEPT_ACT_INFO_T_3600))
) AS t(lvl)

修改点比对

GaussDB(DWS)性能调优:典型不下推语句整改案例

场景2:With-Recursive contains system table is not shippable

根因:递归语句的某个分支中没有FROM字句中只用系统表或者系统视图(DUAL也被视为系统视图)

案例1:递归驱动分支是FROM DUAL查询

原始语句

WITH recursive cte AS (
SELECT 
TO_DATE(201701, 'YYYYMM') as level ,TO_DATE(20170131, 'YYYYMMDD')  LASTDAY  
FROM dual 
UNION ALL 
SELECT
ADD_MONTHS(cte.LEVEL,   1) AS PERIOD,
LAST_DAY(ADD_MONTHS(cte.LEVEL,   1)) AS LASTDAY
FROM cte WHERE cte.LEVEL <=SYSDATE 
) 
SELECT 
TO_CHAR(cte.level,'YYYYMMDD') AS PERIOD , cte.LASTDAY 
FROM cte
WHERE TO_CHAR(cte.level,'YYYYMMDD')<= TO_CHAR(SYSDATE,'YYYYMMDD')

改写语句

WITH recursive cte AS (
SELECT 
TO_DATE(201701, 'YYYYMM') as level ,TO_DATE(20170131, 'YYYYMMDD')  LASTDAY  
FROM generate_series(1, 1) 
UNION ALL 
SELECT
ADD_MONTHS(cte.LEVEL,   1) AS PERIOD,
LAST_DAY(ADD_MONTHS(cte.LEVEL,   1)) AS LASTDAY
FROM cte WHERE cte.LEVEL <=SYSDATE 
) 
SELECT 
TO_CHAR(cte.level,'YYYYMMDD') AS PERIOD , cte.LASTDAY 
FROM cte
WHERE TO_CHAR(cte.level,'YYYYMMDD')<= TO_CHAR(SYSDATE,'YYYYMMDD')

修改点对比

GaussDB(DWS)性能调优:典型不下推语句整改案例

场景3:SubPlan exec on CN can’t be shipped

根因:某个子查询语句只能在CN上执行,通常是这个子查询有不下推因素,比如有系统表、系统视图调用,或者存在不下推函数等

案例1:子查询中系统表/系统视图查询

原始语句

WITH error_log  AS NOT MATERIALIZED (
SELECT 
upper(log_column_name) AS log_column_name,
log_error_code,
s.char_length AS data_length,
s.data_type,s.nullable
FROM (SELECT 
distinct
unnest(string_to_array(bad_log_column_name,',')) AS log_column_name,
unnest(string_to_array(bad_log_error_code,',')) AS log_error_code
FROM stgltc.BAD_cfs_inv_invoice_ad_2500
) T, 
(SELECT * FROM user_tab_columns WHERE table_name=lower('dlt_cfs_inv_invoice_ad_2500')) S 
WHERE  upper(T.log_column_name)=upper(S.column_name)
) 
SELECT 
CASE WHEN upper('ACTIVITY_NAME') IN (SELECT log_column_name FROM error_log WHERE data_type IN ('varchar','char','character','nchar','character varying','varchar2','nvarchar2','clob','text') AND log_error_code='22001'/*字符超长*/) 
THEN SUBSTRB(ACTIVITY_NAME,0,(SELECT distinct DATA_LENGTH FROM error_log WHERE upper(log_column_name)=upper('ACTIVITY_NAME'))) 
ELSE ACTIVITY_NAME 
END AS ACTIVITY_NAME, 
CASE WHEN upper('ADJUSTMENT_ID') IN (SELECT log_column_name FROM error_log WHERE data_type IN ('varchar','char','character','nchar','character varying','varchar2','nvarchar2','clob','text') AND log_error_code='22001'/*字符超长*/) 
THEN SUBSTRB(ADJUSTMENT_ID,0,(SELECT distinct DATA_LENGTH FROM error_log WHERE upper(log_column_name)=upper('ADJUSTMENT_ID'))) 
ELSE ADJUSTMENT_ID 
END AS ADJUSTMENT_ID
FROM stgltc.BAD_cfs_inv_invoice_ad_2500 

改写语句

-- 识别不下推的子查询为WITH error_log字句中的
-- SELECT * FROM user_tab_columns WHERE table_name=lower('dlt_cfs_inv_invoice_ad_2500')
--
-- 因为这部分为系统表查询,无论如何都不能下推,所以此处把这部分结果转储到一个中间表中
-- 中间表创建成行存表
CREATE TEMP TABLE s WITH(orientation=row) DISTRIBUTE BY ROUNDROBIN AS 
SELECT * FROM user_tab_columns WHERE table_name=lower('dlt_cfs_inv_invoice_ad_2500')
-- 因为整个查询涉及到的表都是列存表,之后前面创建的临时表s为行存表
-- 所以此处加一个强制走向量化的hint
WITH error_log  AS NOT MATERIALIZED (
SELECT 
upper(log_column_name) AS log_column_name,
log_error_code,
s.char_length AS data_length,
s.data_type,s.nullable
FROM (SELECT 
distinct
unnest(string_to_array(bad_log_column_name,',')) AS log_column_name,
unnest(string_to_array(bad_log_error_code,',')) AS log_error_code
FROM stgltc.bad_cfs_inv_invoice_ad_2500
) T, 
pg_temp.S 
WHERE  upper(T.log_column_name)=upper(S.column_name)
) 
SELECT /*+ set global(enable_force_vector_engine on)*/
CASE WHEN upper('ACTIVITY_NAME') IN (SELECT log_column_name FROM error_log WHERE data_type IN ('varchar','char','character','nchar','character varying','varchar2','nvarchar2','clob','text') AND log_error_code='22001'/*字符超长*/) 
THEN SUBSTRB(ACTIVITY_NAME,0,(SELECT distinct DATA_LENGTH FROM error_log WHERE upper(log_column_name)=upper('ACTIVITY_NAME'))) 
ELSE ACTIVITY_NAME 
END AS ACTIVITY_NAME, 
CASE WHEN upper('ADJUSTMENT_ID') IN (SELECT log_column_name FROM error_log WHERE data_type IN ('varchar','char','character','nchar','character varying','varchar2','nvarchar2','clob','text') AND log_error_code='22001'/*字符超长*/) 
THEN SUBSTRB(ADJUSTMENT_ID,0,(SELECT distinct DATA_LENGTH FROM error_log WHERE upper(log_column_name)=upper('ADJUSTMENT_ID'))) 
ELSE ADJUSTMENT_ID 
END AS ADJUSTMENT_ID
FROM stgltc.bad_cfs_inv_invoice_ad_2500 

修改点对比

GaussDB(DWS)性能调优:典型不下推语句整改案例

场景4:Type of Record in TargetList can not be shipped

根因:输出列中存在record类型,这种类型的不下推一般是不会体现在最外层的输出列上,一般这类报错有两个场景

1.SQL书写逻辑错误,导致输出列上出现了(…)形式的输出列

2.SQL业务逻辑正确, 这种场景需要了解业务含义,把record字段强转为text类型,然后再使用record字段的地方做特殊适配

案例1:输出列书写错误,出现(…)形式的输出列

原始语句

SELECT
d.id,
coalesce(d.period, 'snull') AS period,
(d.plan_unit_code, 'snull') AS plan_unit_code,
coalesce(d.product_type_model, 'snull') AS product_type_model,
coalesce(d.revision, 'snull') AS revision,
d.start_date
FROM (SELECT *
FROM cdcscm.cdc_mp_d_forecast_t_6120 t
WHERE t.cdc_timestamp > to_date('2023-07-06 00:00:00', 'yyyy-mm-dd hh24:mi:ss') - 7 
AND t.cdc_timestamp < to_date('2023-08-08 00:00:00', 'yyyy-mm-dd hh24:mi:ss')
) t1,
sdiscm.mp_d_forecast_t_6120 d
WHERE (t1.audit_op_type = 'delete' AND t1.audit_op_option = 'before')
AND d.id = t1.id

改写语句

SELECT
d.id,
coalesce(d.period, 'snull') AS period,
coalesce(d.plan_unit_code, 'snull') AS plan_unit_code,
coalesce(d.product_type_model, 'snull') AS product_type_model,
coalesce(d.revision, 'snull') AS revision,
d.start_date
FROM (SELECT *
FROM cdcscm.cdc_mp_d_forecast_t_6120 t
WHERE t.cdc_timestamp > to_date('2023-07-06 00:00:00', 'yyyy-mm-dd hh24:mi:ss') - 7 
AND t.cdc_timestamp < to_date('2023-08-08 00:00:00', 'yyyy-mm-dd hh24:mi:ss')
) t1,
sdiscm.mp_d_forecast_t_6120 d
WHERE (t1.audit_op_type = 'delete' AND t1.audit_op_option = 'before')
AND d.id = t1.id

修改点对比

GaussDB(DWS)性能调优:典型不下推语句整改案例

备注:改写前后语句不等价,不等价的原因是因为原始SQL书写有问题,正确的写法是就是coalesce(d.plan_unit_code, ‘snull’) AS plan_unit_code

场景5:Type of Record in non-real table can not be shipped

根因:8.2.1特定版本下update语句+WITH语句场景存在下推问题

案例1:UPDATE语句不下推

  1. 在语句执行前设置参数  set enable_stream_ctescan = off,业务语句执行完再执行语句 set enable_stream_ctescan = on 重置参数
  2. 语句中UPDATE关键字后加hint,具体方式为 UPDATE /*+ set global(enable_stream_ctescan  off) */

场景6:main plan exec on CN and SubPlan exec on DN can’t be shipped

根因:对于复杂语句,主查询为不涉及用户表的查询,但是主查询相关的子查询是涉及用户表的查询语句

案例1:主查询来源表是维表DUAL

原始语句

SELECT 
1 
FROM (SELECT 1 
FROM dual 
WHERE -2 = -2
AND 202210 IN(SELECT /*+ PARALLEL(8) */ 
DISTINCT PERIOD_ID
FROM DWICBG_VI.CBGDWR_PSI_FIN_OVER_INV_V T 
WHERE T.LAST_MODIFIED_DATE > (SELECT NVL(MAX(LAST_MODIFIED_DATE), TRUNC(SYSDATE) - 1) from FIN_DWB_MSS.DWB_MSS_PSI_FIN_OVER_INV_M_F) AND T.LAST_MODIFIED_DATE <= (SELECT  TRUNC(SYSDATE) + 1) 
)
) t LIMIT 1

改写后语句

SELECT 
1 
FROM(SELECT 1 
FROM (SELECT /*+ PARALLEL(8) */
DISTINCT PERIOD_ID
FROM DWICBG_VI.CBGDWR_PSI_FIN_OVER_INV_V T 
WHERE T.LAST_MODIFIED_DATE > (SELECT NVL(MAX(LAST_MODIFIED_DATE), TRUNC(SYSDATE) - 1) from FIN_DWB_MSS.DWB_MSS_PSI_FIN_OVER_INV_M_F) AND T.LAST_MODIFIED_DATE <= (SELECT  TRUNC(SYSDATE) + 1) 
) t(PERIOD_ID) 
where PERIOD_ID = 202210
) t LIMIT 1

场景7:With-Recursive under multi-nodegroup scenario is not shippable

根因:查询语句中的表涉及多个逻辑集群。这种场景的处理方案一般是通过中间表的方式让所有表在一个逻辑集群,或者把WITH RECURSIVE语句隔离出来以避免影响这个语句的性能

案例1:通过临时表把WITH RECURSIVE语句中的非当前逻辑集群的表转储当前逻辑集群

适用场景:语句中直接包含WITH RECURSIVE语句,且WITH RECURSIVE只有少部分(1-2个)表涉及到其它逻辑集群,并且这些表本身并不大或者经过过滤条件之后结果集不大

原始语句如下。语句中目标表dwltax.dwl_tax_tom_cit_tp_account_tmp在逻辑集群”LC_DW1″中,来源表dwtaxdi.dwi_tax_cit_tp_account_r_i中涉及的表来源于逻辑集群“LC_DL1”

INSERT INTO dwltax.dwl_tax_tom_cit_tp_account_tmp(account_id,parentid) 
SELECT 
S.account_id,S.parentid 
FROM (SELECT 
DISTINCT S.ACCOUNT_ID, S.PARENTID
FROM (
WITH RECURSIVE tmp AS (
SELECT 1 AS LV,
'/'||T.PARENTID AS path,
account_ID AS account_ID,
PARENTID AS PARENTID ,
ROW_number() over() PATH_NUM,
T.DEL_FLAG
FROM dwtaxdi.dwi_tax_cit_tp_account_r_i t
WHERE T.DEL_FLAG = 'N'
AND T.ISLEAF_FLAG = 1
UNION ALL 
SELECT 1+lv AS LV,
p.path||'/'||t.PARENTID AS path,
p.account_ID,
t.PARENTID AS PARENTID,
ROW_number() over() PATH_NUM,
T.DEL_FLAG
FROM dwtaxdi.dwi_tax_cit_tp_account_r_i t
INNER JOIN tmp p ON p.PARENTID = t.account_ID
)
SELECT lv, 
'/' ||account_ID||path|| '/'AS ITEM_ID_PATH,
account_ID,
PARENTID,
PATH_NUM
FROM tmp
WHERE DEL_FLAG='N'
) S
) S

优化方案

CREATE TEMP TABLE DWI_TAX_CIT_TP_ACCOUNT_R_i -- 把来源表数据导入临时表,临时表在逻辑集群“LC_DW1”
WITH(orientation=column) 
TO GROUP "LC_DW1" 
AS SELECT * 
FROM dwtaxdi.DWI_TAX_CIT_TP_ACCOUNT_R_i;
INSERT INTO dwltax.dwl_tax_tom_cit_tp_account_tmp(account_id,parentid)  -- 来源表和目标表都在逻辑集群“LC_DW1”
SELECT 
S.account_id,S.parentid 
FROM (SELECT 
DISTINCT S.ACCOUNT_ID, S.PARENTID
FROM (
WITH RECURSIVE tmp AS (
SELECT 1 AS LV,
'/'||T.PARENTID AS path,
account_ID AS account_ID,
PARENTID AS PARENTID ,
ROW_number() over() PATH_NUM,
T.DEL_FLAG
FROM dwi_tax_cit_tp_account_r_i t
WHERE T.DEL_FLAG = 'N'
AND T.ISLEAF_FLAG = 1
UNION ALL 
SELECT 1+lv AS LV,
p.path||'/'||t.PARENTID AS path,
p.account_ID,
t.PARENTID AS PARENTID,
ROW_number() over() PATH_NUM,
T.DEL_FLAG
FROM dwi_tax_cit_tp_account_r_i t
INNER JOIN tmp p ON p.PARENTID = t.account_ID
)
SELECT lv, 
'/' ||account_ID||path|| '/'AS ITEM_ID_PATH,
account_ID,
PARENTID,
PATH_NUM
FROM tmp
WHERE DEL_FLAG='N'
) S
) S

优化后的执行计划

 id |                                                               operation                                                               |  E-rows  | E-distinct | E-memory | E-width | E-costs  
----+---------------------------------------------------------------------------------------------------------------------------------------+----------+------------+----------+---------+----------
1 | ->  Row Adapter                                                                                                                       |        1 |            |          |      64 | 92487.39 
2 |    ->  Vector Streaming (type: GATHER)                                                                                                |        1 |            |          |      64 | 92487.39 
3 |       ->  Vector Insert on dwltax.dwl_tax_tom_cit_tp_account_tmp                                                                      |       72 |            | 16MB     |      64 | 92485.11 
4 |          ->  Vector Adapter                                                                                                           |       72 |            | 1MB      |      64 | 92483.10 
5 |             ->  Streaming(type: ROUNDROBIN)                                                                                           |       72 |            | 2MB      |      64 | 92483.10 
6 |                ->  HashAggregate                                                                                                      |        1 |            | 16MB     |      64 | 92480.84 
7 |                   ->  Result                                                                                                          |        1 |            | 1MB      |       0 | 92480.81 
8 |                      ->  Streaming(type: BROADCAST)                                                                                   |        1 |            | 2MB      |       0 | 0.30     
9 |                         ->  CTE Scan on tmp                                                                                           |        1 |            | 1MB      |       0 | 0.02     
10 |                            ->  Recursive Union (11,15)                                                                                |  2027304 |            |          |      78 | 92480.51 
11 |                               ->  WindowAgg                                                                                           |    33624 |            |          |      18 | 838.02   
12 |                                  ->  Streaming(type: BROADCAST)                                                                       |  2421648 |            |          |      18 | 829.85   
13 |                                     ->  Row Adapter                                                                                   |    33624 |            |          |      18 | 534.32   
14 |                                        ->  CStore Scan on pg_temp_cn_5010_2869_708_140584651829760.dwi_tax_cit_tp_account_r_i t       |    33634 |            |          |      18 | 534.32   
15 |                               ->  WindowAgg                                                                                           |   199368 |            |          |      78 | 9107.93  
16 |                                  ->  Streaming(type: BROADCAST) stream_level:1                                                        | 14354280 |            |          |      78 | 9045.63  
17 |                                     ->  Hash Join (18,19) <<ruid:[10] ctlid:[10] (SYNC)>>                                             |   199365 |            |          |      78 | 1452.52  
18 |                                        ->  WorkTable Scan on tmp p                                                                    | 24209280 | 67248      |          |     100 | 38.64    
19 |                                        ->  Hash                                                                                       |    39888 | 554        |          |      18 | 531.55   
20 |                                           ->  Row Adapter                                                                             |    39888 |            |          |      18 | 531.55   
21 |                                              ->  CStore Scan on pg_temp_cn_5010_2869_708_140584651829760.dwi_tax_cit_tp_account_r_i t |    39873 |            |          |      18 | 531.55   
SQL Diagnostic Information    
----------------------------------
CTE is inlined
There are recursive CTEs in query

案例2:把WITH RECURSIVE语句部分的结果转储到临时表

适用场景:当WITH RECURSIVE部分结果比较小,且第一种整改方案比较大的时候,常见的有以下场景

a) WITH RECURSIVE语句涉及到较多其它逻辑集中的表,转储比较麻烦

b)当前语句没有直接调用WITH RECURSIVE语句,但是调用的视图中包含WITH RECURSIVE逻辑

原始语句

INSERT INTO dwltax.bif_rpt_item_htp_tax_t(PERIOD_ID,REPORT_ITEM_ID,CRT_CYCLE_ID,LAST_UPD_CYCLE_ID,UPD_JOB_INSTANCE_ID, DEL_FLAG, CRT_JOB_INSTANCE_ID, UCCID )
SELECT
DISTINCT T.PERIOD_ID ,158004503,-1,-1,-1,'N',-1,T.UCCID
FROM dwltax.dwl_tax_tom_taxpln_htp_tag_v T   
INNER JOIN  dwltax.dwl_tax_tom_cit_tp_account_v  TAX_E1502044 ON T.ACCOUNT_ID =  TAX_E1502044.ACCOUNT_ID  
INNER JOIN  dwltax.dwl_tax_tom_cit_tp_rf_tag_v  TAX_E1502045 ON T.RF_ID =  TAX_E1502045.RF_ID 
WHERE  1 = 1  
AND T.SOURCE_SYSTEM = 'HWHTPLOC01'
AND TAX_E1502044.PARENTID_ACCOUNT_CODE IN ('TARFAdjustmentsManual','TARFAdjustmentsExpenseDeferredReclass')
AND TAX_E1502045.PARENTID_RF_CODE = 'TARFTotal'
AND T.DATACATEGORY_CODE IN ('TARFDeferredNonCurrentLossNR')
AND T.PERIOD_ID=202308

分析发现如上语句的dwltax.dwl_tax_tom_cit_tp_rf_tag_v包含WITH RECURSIVE逻辑

CREATE OR REPLACE VIEW dwltax.dwl_tax_tom_cit_tp_rf_tag_v AS  SELECT d2.rf_id AS parent_rf_id, 
d2.label AS parentid_rf_code, d1.rf_id, d1.label AS rf_code, 
d1.userdefined1, d1.userdefined2, d1.userdefined3, d1.en_us, d1.en_gb, 
d2.en_us AS parent_en_us, d2.en_gb AS parent_en_gb, d2.source_system
FROM (SELECT 
DISTINCT r1.rf_id, r1.parentid
FROM (SELECT 
DISTINCT s.rf_id, s.parentid
FROM (WITH RECURSIVE tmp AS (SELECT 1 AS lv, 
'/'::text || t.parentid::text AS path, 
t.rf_id, t.parentid
FROM dwtaxdi.dwi_tax_cit_tp_rf_r_i t1
RIGHT JOIN dwtaxdi.dwi_tax_cit_tp_rf_r_i t ON t.rf_id = t1.rf_id AND t.parentid = t1.parentid AND t1.del_flag::text = 'N'::text
WHERE t.del_flag::text = 'N'::text AND t.isleaf_flag = 1::numeric AND 1 = 1 AND 1 = 1 AND 1 = 1
UNION ALL
SELECT 1 + p.lv AS lv, 
(p.path || '/'::text) || t.parentid::text AS path, 
p.rf_id, t.parentid
FROM tmp p, dwtaxdi.dwi_tax_cit_tp_rf_r_i t1
RIGHT JOIN dwtaxdi.dwi_tax_cit_tp_rf_r_i t ON t.rf_id = t1.rf_id AND t.parentid = t1.parentid AND t1.del_flag::text = 'N'::text
WHERE p.parentid = t.rf_id AND t.del_flag::text = 'N'::text AND 1 = 1 AND 1 = 1 AND 1 = 1
)
SELECT (('/'::text || tmp.rf_id::text) || tmp.path) || '/'::text AS item_id_path, 
tmp.rf_id, tmp.parentid
FROM tmp
) s
UNION ALL
SELECT r.rf_id, r.rf_id AS parentid
FROM dwtaxdi.dwi_tax_cit_tp_rf_r_i r
WHERE r.isleaf_flag = 1::numeric AND r.del_flag::text = 'N'::text) r1
) r, dwtaxdi.dwi_tax_cit_tp_rf_i d1, dwtaxdi.dwi_tax_cit_tp_rf_i d2
WHERE r.rf_id = d1.rf_id AND r.parentid = d2.rf_id;

视图dwltax.dwl_tax_tom_cit_tp_rf_tag_v的结果集只有1w行左右,因此我们尝试把事务dwltax.dwl_tax_tom_cit_tp_rf_tag_v的结果集转储到中间表,从而保证主查询可以下推

优化后语句

-- 8.2.1及以上版本
CREATE TEMP TABLE dwl_tax_tom_cit_tp_rf_tag_v WITH(orientation=column, enable_hstore= true) 
AS
SELECT * FROM dwltax.dwl_tax_tom_cit_tp_rf_tag_v
-- 8.2.0及以下版本
CREATE TEMP TABLE dwl_tax_tom_cit_tp_rf_tag_v WITH(orientation=row) 
AS
SELECT * FROM dwltax.dwl_tax_tom_cit_tp_rf_tag_v
INSERT /*+ set global(enable_force_vector_engine on)*/INTO  
-- 当dwl_tax_tom_cit_tp_rf_tag_v属性为orientation=column, enable_hstore= true时可以不加hint
dwltax.bif_rpt_item_htp_tax_t(PERIOD_ID,REPORT_ITEM_ID,CRT_CYCLE_ID,LAST_UPD_CYCLE_ID,UPD_JOB_INSTANCE_ID, DEL_FLAG, CRT_JOB_INSTANCE_ID, UCCID )
SELECT
DISTINCT T.PERIOD_ID ,158004503,-1,-1,-1,'N',-1,T.UCCID
FROM dwltax.dwl_tax_tom_taxpln_htp_tag_v T   
INNER JOIN  dwltax.dwl_tax_tom_cit_tp_account_v  TAX_E1502044 ON T.ACCOUNT_ID =  TAX_E1502044.ACCOUNT_ID  
INNER JOIN  pg_temp.dwl_tax_tom_cit_tp_rf_tag_v  TAX_E1502045 ON T.RF_ID =  TAX_E1502045.RF_ID 
WHERE  1 = 1  
AND T.SOURCE_SYSTEM = 'HWHTPLOC01'
AND TAX_E1502044.PARENTID_ACCOUNT_CODE IN ('TARFAdjustmentsManual','TARFAdjustmentsExpenseDeferredReclass')
AND TAX_E1502045.PARENTID_RF_CODE = 'TARFTotal'
AND T.DATACATEGORY_CODE IN ('TARFDeferredNonCurrentLossNR')
AND T.PERIOD_ID=202308

场景8:With-Recursive contains conflict distribution in none-recursive(xxx) recursive(xxx)

不下推根因:非递归部分语句(UNION ALL前面部分)的执行计划为replicate计划,递归部分语句(UNION ALL后面部分)的执行计划为Hash计划,计划存在冲突

处理方案:把非递归部分语句转储为临时表,临时表的分布形式和计划的分布式不一致即可

案例1:非递归部分语句的执行计划为replicate计划

原始语句:

WITH RECURSIVE CTE AS (
SELECT
AOH.ORG_CODE,1 AS LEVEL,
1 FLAG,
LAST_UPDATE_DATE
FROM SDIHR.HW_HR_ACCOUNT_ORG_HY_V_1100 AOH 
WHERE AOH.ORG_CODE IN (SELECT V.ORG_CODE
FROM SDIHR.HW_HR_ACCOUNT_ORG_HY_V_1100 V
WHERE V.ORG_CODE IS NOT NULL
AND AOH.ORG_CODE = V.ORG_CODE
)
UNION ALL
SELECT
AOH.ORG_CODE,T.LEVEL + 1 AS LEVEL,
1 FLAG,
AOH.LAST_UPDATE_DATE
FROM SDIHR.HW_HR_ACCOUNT_ORG_HY_V_1100 AOH 
INNER JOIN CTE T ON T.ORG_CODE = AOH.PARENT_ORG_CODE
)
SELECT
ORG_CODE,
MAX(LEVEL) -1 AS AOH_LEVEL,
1 FLAG,
LAST_UPDATE_DATE AS LAST_UPDATE_DATE1
FROM CTE
GROUP BY ORG_CODE, LAST_UPDATE_DATE

SQL自诊断信息

GaussDB(DWS)性能调优:典型不下推语句整改案例

优化后语句

CREATE TEMP TABLE t WITH(orientation=column) DISTRIBUTE BY ROUNDROBIN
AS
SELECT
AOH.ORG_CODE,1 AS LEVEL,
1 FLAG,
LAST_UPDATE_DATE
FROM SDIHR.HW_HR_ACCOUNT_ORG_HY_V_1100 AOH 
WHERE AOH.ORG_CODE IN (SELECT V.ORG_CODE
FROM SDIHR.HW_HR_ACCOUNT_ORG_HY_V_1100 V
WHERE V.ORG_CODE IS NOT NULL
AND AOH.ORG_CODE = V.ORG_CODE
);
WITH RECURSIVE CTE AS (
SELECT * FROM t
UNION ALL
SELECT
AOH.ORG_CODE,T.LEVEL + 1 AS LEVEL,
1 FLAG,
AOH.LAST_UPDATE_DATE
FROM SDIHR.HW_HR_ACCOUNT_ORG_HY_V_1100 AOH 
INNER JOIN CTE T ON T.ORG_CODE = AOH.PARENT_ORG_CODE
)
SELECT
ORG_CODE,
MAX(LEVEL) -1 AS AOH_LEVEL,
1 FLAG,
LAST_UPDATE_DATE AS LAST_UPDATE_DATE1
FROM CTE
GROUP BY ORG_CODE, LAST_UPDATE_DATE

场景9:Table in TargetList can not be shipped

不下推根因:表名或者表的别名出现在targetlistlist(输出列SELECT 和 FROM之间)里面

处理方案:改写SQL,消除对表名或者表的别名的直接引用

案例1:如场景描述,表test的别名t,出现在targetlistlist中

postgres=#  EXPLAIN VERBOSE SELECT t, 1 FROM test t;
QUERY PLAN
----------------------------------------------------------------------------------------------------
id |                     operation                     | E-rows | E-distinct | E-width | E-costs
----+---------------------------------------------------+--------+------------+---------+---------
1 | ->  Data Node Scan on test "_REMOTE_TABLE_QUERY_" |     20 |            |      80 | 0.00
SQL Diagnostic Information
-----------------------------------------------
SQL is not plan-shipping
reason: Table in TargetList can not be shipped
Targetlist Information (identified by plan id)
-----------------------------------------------------------------------------------
1 --Data Node Scan on test "_REMOTE_TABLE_QUERY_"
Output: t.*, (1)
Node/s: All datanodes (group_version1, bucket:16384)
Remote query: SELECT t.*::public.test, 1 FROM ONLY public.test t WHERE true

规避方案:强转为text类型,或者优化SQL语句,输出列上去除表名或者表的别名

postgres=#  EXPLAIN VERBOSE SELECT t::text, 1 FROM test t;
QUERY PLAN
----------------------------------------------------------------------------------------------
id |            operation             | E-rows | E-distinct | E-memory | E-width | E-costs
----+----------------------------------+--------+------------+----------+---------+---------
1 | ->  Streaming (type: GATHER)     |     20 |            |          |      80 | 16.15
2 |    ->  Seq Scan on public.test t |     20 |            | 1MB      |      80 | 10.15
Targetlist Information (identified by plan id)
------------------------------------------------------------
1 --Streaming (type: GATHER)
Output: ((t.*)::text), 1
Node/s: All datanodes (group_version1, bucket:16384)
2 --Seq Scan on public.test t
Output: (t.*)::text, 1

场景10:”Row () Expression” can not be shipped

不下推根因:语句中有row表达式,一般表现为在targetlistlist(输出列SELECT 和 FROM之间)上存在括号表达式”(……)”

处理方案:改写SQL,消除row表达式

案例1:如场景描述,row表达式(k4.businessID, arriveDate)出现在targetlistlist中

postgres=# EXPLAIN VERBOSE
postgres-# SELECT
postgres-#     date_format(arriveDate, '%Y-%m-%d') AS "日期",
postgres-#     sales,
postgres-#     COUNT(DISTINCT (k4.businessID, arriveDate)/*row表达式*/)
postgres-# FROM (
postgres(#     SELECT k1.businessID, k2.arriveDate, k1.sales
postgres(#     FROM potential_customers k1
postgres(#     LEFT JOIN flow_reception k2
postgres(#         ON k1.businessID = k2.businessID
postgres(#     WHERE k2.arriveDate BETWEEN '2024-08-31' AND '2024-09-02'
postgres(# ) k4
postgres-# GROUP BY arriveDate, sales;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
id |                          operation                           | E-rows | E-distinct | E-memory | E-width | E-costs
----+--------------------------------------------------------------+--------+------------+----------+---------+---------
1 | ->  GroupAggregate                                           |      2 |            |          |     104 | 20.63
2 |    ->  Sort                                                  |      2 |            |          |      96 | 20.56
3 |       ->  Subquery Scan on k4                                |      2 |            |          |      96 | 20.54
4 |          ->  Streaming (type: SUBQUERY GATHER)               |      2 |            |          |      96 | 26.53
5 |             ->  Nested Loop (6,8)                            |      2 |            | 1MB      |      96 | 20.53
6 |                ->  Streaming(type: BROADCAST)                |      2 |            | 2MB      |      64 | 10.31
7 |                   ->  Seq Scan on public.flow_reception k2   |      1 |            | 1MB      |      64 | 10.15
8 |                ->  Seq Scan on public.potential_customers k1 |     20 |            | 1MB      |      64 | 10.10
SQL Diagnostic Information
-----------------------------------------------
Partial SQL is not plan-shipping
reason: "Row () Expression" can not be shipped
Predicate Information (identified by plan id)
-------------------------------------------------------------------------------------------------
5 --Nested Loop (6,8)
Join Filter: (k1.businessid = k2.businessid)
7 --Seq Scan on public.flow_reception k2
Filter: ((k2.arrivedate >= '2024-08-31'::text) AND (k2.arrivedate <= '2024-09-02'::text))

规避方案:row表达式不直接输出,因此改写row表达式为字符串拼接

postgres=# EXPLAIN VERBOSE
postgres-# SELECT
postgres-#     date_format(arriveDate, '%Y-%m-%d') AS "日期",
postgres-#     sales,
postgres-#     COUNT(DISTINCT k4.businessID ||'-' ||  arriveDate/*消除row表达式*/)
postgres-# FROM (
postgres(#     SELECT k1.businessID, k2.arriveDate, k1.sales
postgres(#     FROM potential_customers k1
postgres(#     LEFT JOIN flow_reception k2
postgres(#         ON k1.businessID = k2.businessID
postgres(#     WHERE k2.arriveDate BETWEEN '2024-08-31' AND '2024-09-02'
postgres(# ) k4
postgres-# GROUP BY arriveDate, sales;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
id |                          operation                           | E-rows | E-distinct | E-memory | E-width | E-costs
----+--------------------------------------------------------------+--------+------------+----------+---------+---------
1 | ->  Streaming (type: GATHER)                                 |      2 |            |          |     112 | 26.71
2 |    ->  HashAggregate                                         |      2 |            | 16MB     |     112 | 20.71
3 |       ->  HashAggregate                                      |      2 |            | 16MB     |     104 | 20.68
4 |          ->  Streaming(type: REDISTRIBUTE)                   |      2 |            | 2MB      |      96 | 20.64
5 |             ->  Nested Loop (6,8)                            |      2 |            | 1MB      |      96 | 20.53
6 |                ->  Streaming(type: BROADCAST)                |      2 |            | 2MB      |      64 | 10.31
7 |                   ->  Seq Scan on public.flow_reception k2   |      1 |            | 1MB      |      64 | 10.15
8 |                ->  Seq Scan on public.potential_customers k1 |     20 |            | 1MB      |      64 | 10.10
Predicate Information (identified by plan id)
-------------------------------------------------------------------------------------------------
5 --Nested Loop (6,8)
Join Filter: (k1.businessid = k2.businessid)
7 --Seq Scan on public.flow_reception k2
Filter: ((k2.arrivedate >= '2024-08-31'::text) AND (k2.arrivedate <= '2024-09-02'::text))

场景11:Merge Into can’t be shipped in the sence temporarily

不下推根因:MERGE INTO语句的USING字句部分还有非列表达式,具体见案例

处理方案:改写SQL,非列表达式

案例1:如场景描述,子查询d中的WHERE条件含有AND 1 = 2

MERGE INTO dwifin.dwi_bill_pay_interface_line l
USING (SELECT to_number(ss_id || invoice_id) as ap_invoice_id,
to_number(ss_id || invoice_line_id) as credit_invoice_line_id,
line_number,
nvl(deleted_flag, 'N') AS src_sys_del_flag
FROM sdifin.apwf_pay_interface_line_t_6600 t
WHERE t.adjust_type = 'Y' AND 1 = 2) d
ON (l.ap_invoice_id = d.ap_invoice_id AND l.credit_invoice_line_id = d.credit_invoice_line_id AND l.line_number = d.line_number AND l.src_sys_del_flag = d.src_sys_del_flag AND l.ss_id = 6600
)
WHEN MATCHED THEN UPDATE SET l.del_flag = 'Y', last_upd_cycle_id = 20250301000000;

规避方案:这种一般是SQL语句书写错误,去掉条件“AND 1 = 2”即可

场景12:Type of Record in dual that is not a real table can not be shipped

不下推根因:优化器识别到的record列,且record列对应的主表不是一个实体表。最常见的情况为MERGE INTO语句的USING部分使用了FROM dual字句导致,具体见案例

处理方案:改写SQL,消除dual字句

案例1:如场景描述,子查询f为(SELECT 0 AS inv_id,0 AS ovs_pack_num,0 AS ovs_pack_id FROM DUAL)

MERGE INTO dwiscm.dwi_inv_balance_onhand t USING (
SELECT
f.inv_id,
d.ovs_pack_id
FROM (SELECT 0 AS inv_id,0 AS ovs_pack_num,0 AS ovs_pack_id FROM DUAL) f,
dwicommon.dwi_inv_oversea_pack d
WHERE f.ovs_pack_num = d.ovs_pack_num
) S ON (t.inv_id = s.inv_id)
WHEN MATCHED THEN UPDATE
SET t.ovs_pack_id = s.ovs_pack_id, t.last_upd_cycle_id = 20250301000000, t.dw_last_update_date = sysdate;

计划如下

                                   QUERY PLAN 
-------------------------------------------------------------------------------------------------------------------------------
id |                                  operation                                   | E-rows | E-distinct | E-width | E-costs
----+------------------------------------------------------------------------------+--------+------------+---------+---------
1 | ->  Merge on dwiscm.dwi_inv_balance_onhand t                                 |      1 |            |    3404 | 0.05
2 |    ->  Nested Loop (3,6)                                                     |      1 |            |    3404 | 0.05
3 |       ->  Nested Loop (4,5)                                                  |      1 |            |    3372 | 0.03
4 |          ->  Data Node Scan on dwi_inv_balance_onhand "_REMOTE_TABLE_QUERY_" |      1 |            |    3372 | 0.00
5 |          ->  Result                                                          |      1 |            |       0 | 0.01
6 |       ->  Data Node Scan on dwi_inv_oversea_pack "_REMOTE_TABLE_QUERY_"      |      1 |            |      32 | 0.00
SQL Diagnostic Information
---------------------------------------------------------------------------
SQL is not plan-shipping
reason: Type of Record in dual that is not a real table can not be shipped

SQL改写如下

MERGE INTO dwiscm.dwi_inv_balance_onhand t USING (
SELECT
0 AS inv_id,
d.ovs_pack_id
FROM dwicommon.dwi_inv_oversea_pack d
WHERE 0 = d.ovs_pack_num
) S ON (t.inv_id = s.inv_id)
WHEN MATCHED THEN UPDATE
SET t.ovs_pack_id = s.ovs_pack_id, t.last_upd_cycle_id = 20250301000000, t.dw_last_update_date = sysdate;

改写后SQL语句执行计划如下

                                                               QUERY PLAN                               
--------------------------------------------------------------------------------------------------------------------------------------
id |                               operation                               | E-rows | E-distinct |   E-memory   | E-width | E-costs
----+-----------------------------------------------------------------------+--------+------------+--------------+---------+---------
1 | ->  Row Adapter                                                       |      1 |            |              |    3414 | 25.78
2 |    ->  Vector Streaming (type: GATHER)                                |      1 |            |              |    3414 | 25.78
3 |       ->  Vector Merge on dwiscm.dwi_inv_balance_onhand t             |      2 |            | 36MB(3035MB) |    3414 | 23.28
4 |          ->  Vector Nest Loop (5,6)                                   |      2 |            | 1MB          |    3414 | 23.28
5 |             ->  CStore Scan on dwiscm.dwi_inv_balance_onhand t        |      1 |            | 1MB          |    3372 | 10.04
6 |             ->  Vector Materialize                                    |      2 |            | 16MB         |      42 | 13.24
7 |                ->  Vector Streaming(type: BROADCAST)                  |      2 |            | 2MB          |      42 | 13.24
8 |                   ->  CStore Scan on dwicommon.dwi_inv_oversea_pack d |      1 |            | 1MB          |      42 | 13.08
Predicate Information (identified by plan id)
----------------------------------------------------------
5 --CStore Scan on dwiscm.dwi_inv_balance_onhand t
Filter: (t.inv_id = 0::numeric)
Pushdown Predicate Filter: (t.inv_id = 0::numeric)
8 --CStore Scan on dwicommon.dwi_inv_oversea_pack d
Filter: (0 = (d.ovs_pack_num)::bigint)

原创文章,作者:奋斗,如若转载,请注明出处:https://blog.ytso.com/tech/bigdata/317345.html

(0)
上一篇 12小时前
下一篇 11小时前

相关推荐

发表回复

登录后才能评论