背景:
因为之前处理了一些关于not exsts导致的性能sql,但是没时间整理,最近打算抽时间分享一下。
验证案例:
sql文本如下,因为篇幅有限,而且不太需要,所以我就不放建表语句及数据量的信息了
select count(1)
from tttt.mmmmm_sssssale t
where t.sssss not in ('e111', 'ddddda')
and t.stats = '1'
and t.parean is null
and t.city = 2208
AND (t.cusystatus = 'FFFFGGGGG')
AND (T.ORGGGGGGNEL is null or T.ORGGGGGGNEL != 'infonow')
AND NOT EXISTS (SELECT 1
FROM tttt.TTTT_OWN_C TOW
WHERE T.PID = TOW.OID
AND TOW.CT_ID = T.city
AND TOW.OODDD_sTS IN
(SELECT DDDC
FROM tttt.CTM_GM
WHERE GPID = 'OtherThing'
AND stats = '1')) and to_char(createdate,'yyyymmdd') between 20150101 and 202301211;
该sql我们看下执行计划和执行时间
+----------+
| COUNT(1) |
+----------+
| 29487 |
+----------+
1 row in set (43.77 sec)
*************************** 1. row ***************************
Query Plan: ===========================================================================================
|ID|OPERATOR |NAME |EST. ROWS|COST |
-------------------------------------------------------------------------------------------
|0 |SCALAR GROUP BY | |1 |165892|
|1 | NESTED-LOOP ANTI JOIN | |858 |165860|
|2 | TABLE SCAN |T |1329 |40619 |
|3 | PX COORDINATOR | |1 |94 |
|4 | EXCHANGE OUT DISTR |:EX10001 |1 |94 |
|5 | SUBPLAN SCAN |VIEW2 |1 |94 |
|6 | NESTED-LOOP JOIN | |1 |94 |
|7 | EXCHANGE IN DISTR | |1 |92 |
|8 | EXCHANGE OUT DISTR (BC2HOST)|:EX10000 |1 |92 |
|9 | TABLE SCAN |TOW(IDX_TTTT_OWN_C_ORDERID) |1 |92 |
|10| TABLE SCAN |SD_CTM_GM(PK_SD_CTM_GM) |1 |32 |
===========================================================================================
Outputs & filters:
-------------------------------------
0 - output([T_FUN_COUNT(*)(0x7f4fe8fdbcb0)]), filter(nil),
group(nil), agg_func([T_FUN_COUNT(*)(0x7f4fe8fdbcb0)])
1 - output([remove_const(1)(0x7f4af5473fc0)]), filter(nil),
conds(nil), nl_params_([T.PID(0x7f4fe8f81610)]), batch_join=false
2 - output([T.PID(0x7f4fe8f81610)]), filter([T.city(0x7f4fe8f34b00) = 2208(0x7f4fe8f343e0)], [cast(cast(TO_CHAR(T.CREATEDATE(0x7f4fe8fd6170), ?)(0x7f4fe8fd2470), VARCHAR2(256 BYTE))(0x7f4fe8fd7100), NUMBER(-1, -85))(0x7f4fe8fd7a90) >= 20150101(0x7f4fe8fd4c10)], [cast(cast(TO_CHAR(T.CREATEDATE(0x7f4fe8fd6170), ?)(0x7f4fe8fd3ad0), VARCHAR2(256 BYTE))(0x7f4fe8fd93a0), NUMBER(-1, -85))(0x7f4fe8fd9d30) <= 202301211(0x7f4fe8fd52f0)], [(T_OP_IS, T.ORGGGGGGNEL(0x7f4fe8f383b0), NULL, 0)(0x7f4fe8f36ca0) OR T.ORGGGGGGNEL(0x7f4fe8f383b0) != ?(0x7f4fe8f37bf0)(0x7f4fe8f36020)], [(T_OP_NOT_IN, T.sssss(0x7f4fe8f31520), (?, ?)(0x7f4fe8f30860))(0x7f4fe8f2fe70)], [(T_OP_IS, T.parean(0x7f4fe8f33a00), NULL, 0)(0x7f4fe8f33030)], [T.stats(0x7f4fe8f32680) = ?(0x7f4fe8f31f60)], [T.cusystatus(0x7f4fe8f35c10) = ?(0x7f4fe8f354f0)]),
access([T.sssss(0x7f4fe8f31520)], [T.stats(0x7f4fe8f32680)], [T.parean(0x7f4fe8f33a00)], [T.city(0x7f4fe8f34b00)], [T.cusystatus(0x7f4fe8f35c10)], [T.ORGGGGGGNEL(0x7f4fe8f383b0)], [T.PID(0x7f4fe8f81610)], [T.CREATEDATE(0x7f4fe8fd6170)]), partitions(p0),
is_index_back=false, filter_before_indexback[false,false,false,false,false,false,false,false],
range_key([T.__pk_increment(0x7f4fe903ae10)]), range(MIN ; MAX)always true
3 - output([remove_const(1)(0x7f4af5474810)]), filter(nil)
4 - output([remove_const(1)(0x7f4af5475060)]), filter(nil), is_single, dop=1
5 - output([remove_const(1)(0x7f4af54758b0)]), filter(nil),
access([VIEW2.TOW.OID(0x7f4fe8fef420)])
6 - output([TOW.OID(0x7f4af53c1210)]), filter(nil),
conds(nil), nl_params_([TOW.OODDD_sTS(0x7f4af53c17f0)]), batch_join=true
7 - output([TOW.OID(0x7f4af53c1210)], [TOW.OODDD_sTS(0x7f4af53c17f0)]), filter(nil)
8 - output([TOW.OID(0x7f4af53c1210)], [TOW.OODDD_sTS(0x7f4af53c17f0)]), filter(nil), is_single, dop=1
9 - output([TOW.OID(0x7f4af53c1210)], [TOW.OODDD_sTS(0x7f4af53c17f0)]), filter([TOW.CT_ID(0x7f4af53c1500) = 2208(0x7f4af53c23b0)]),
access([TOW.OID(0x7f4af53c1210)], [TOW.CT_ID(0x7f4af53c1500)], [TOW.OODDD_sTS(0x7f4af53c17f0)]), partitions(p0),
is_index_back=true, filter_before_indexback[true],
range_key([TOW.OID(0x7f4af53c1210)], [TOW.WORK_STATION(0x7f4af53f1de0)], [TOW.CT_ID(0x7f4af53c1500)], [TOW.__pk_increment(0x7f4af53ec1d0)]), range(MIN,MIN,MIN,MIN ; MAX,MAX,MAX,MAX)always true,
range_cond([? = TOW.OID(0x7f4af53c1210)(0x7f4af53c47c0)])
10 - output([remove_const(1)(0x7f4af5476100)]), filter([SD_CTM_GM.stats(0x7f4af53c1dd0) = ?(0x7f4af53c2ea0)]),
access([SD_CTM_GM.stats(0x7f4af53c1dd0)]), partitions(p0),
is_index_back=true, filter_before_indexback[false],
range_key([SD_CTM_GM.DDDC(0x7f4af53c20c0)], [SD_CTM_GM.GPID(0x7f4af53c1ae0)], [SD_CTM_GM.shadow_pk_0(0x7f4af5410640)]), range(MIN ; MAX),
range_cond([SD_CTM_GM.GPID(0x7f4af53c1ae0) = ?(0x7f4af53c3f60)], [? = SD_CTM_GM.DDDC(0x7f4af53c20c0)(0x7f4af5423680)])
可以看到执行时间43s正常这个效率,在线业务的话很难接受,因为该sql并不复杂,执行计划没有太大的问题,走了nl anti join。
后来我尝试改写掉not exist
obclient> select count(1)
-> from tttt.mmmmm_sssssale t
-> left join tttt.TTTT_OWN_C TOW
-> on T.PID = TOW.OID
-> AND TOW.CT_ID = T.city
-> AND TOW.OODDD_sTS IN
-> (SELECT DDDC
-> FROM tttt.CTM_GM
-> WHERE GPID = 'OtherThing'
-> AND stats = '1')
-> where t.sssss not in ('e111', 'ddddda')
-> and t.stats = '1'
-> and t.parean is null
-> and t.city = 2208
-> AND (t.cusystatus = 'FFFFGGGGG')
-> AND (T.ORGGGGGGNEL is null or T.ORGGGGGGNEL != 'infonow')
-> and to_char(createdate,'yyyymmdd') between 20150101 and 202301211
-> and TOW.CT_ID is null and TOW.OID is null ;
+----------+
| COUNT(1) |
+----------+
| 29487 |
+----------+
1 row in set (1.08 sec)
Query Plan: ===========================================================================================
|ID|OPERATOR |NAME |EST. ROWS|COST |
-------------------------------------------------------------------------------------------
|0 |SCALAR GROUP BY | |1 |163357|
|1 | NESTED-LOOP OUTER JOIN | |858 |163324|
|2 | TABLE SCAN |T |1329 |40619 |
|3 | PX COORDINATOR | |1 |92 |
|4 | EXCHANGE OUT DISTR |:EX10001 |1 |92 |
|5 | SUBPLAN SCAN |VIEW1 |1 |92 |
|6 | NESTED-LOOP JOIN | |1 |92 |
|7 | EXCHANGE IN DISTR | |1 |92 |
|8 | EXCHANGE OUT DISTR (BC2HOST)|:EX10000 |1 |92 |
|9 | TABLE SCAN |TOW(IDX_TTTT_OWN_C_ORDERID) |1 |92 |
|10| TABLE SCAN |SD_CTM_GM(PK_SD_CTM_GM) |1 |32 |
===========================================================================================
Outputs & filters:
-------------------------------------
0 - output([T_FUN_COUNT(*)(0x7f4fe8f96f80)]), filter(nil),
group(nil), agg_func([T_FUN_COUNT(*)(0x7f4fe8f96f80)])
1 - output([remove_const(1)(0x7f68c967e220)]), filter([(T_OP_IS, VIEW1.TOW.CT_ID(0x7f4fe8fec060), NULL, 0)(0x7f4fe8f953d0)], [(T_OP_IS, VIEW1.TOW.OID(0x7f4fe8febd70), NULL, 0)(0x7f4fe8f96480)]),
conds(nil), nl_params_([T.PID(0x7f4fe8f31660)], [T.city(0x7f4fe8f32d90)]), batch_join=false, px_batch_rescan=true
2 - output([T.PID(0x7f4fe8f31660)], [T.city(0x7f4fe8f32d90)]), filter([T.city(0x7f4fe8f32d90) = 2208(0x7f4fe8f864a0)], [cast(cast(TO_CHAR(T.CREATEDATE(0x7f4fe8f8f2e0), ?)(0x7f4fe8f8b5e0), VARCHAR2(256 BYTE))(0x7f4fe8f90270), NUMBER(-1, -85))(0x7f4fe8f90c00) >= 20150101(0x7f4fe8f8dd80)], [cast(cast(TO_CHAR(T.CREATEDATE(0x7f4fe8f8f2e0), ?)(0x7f4fe8f8cc40), VARCHAR2(256 BYTE))(0x7f4fe8f92510), NUMBER(-1, -85))(0x7f4fe8f92ea0) <= 202301211(0x7f4fe8f8e460)], [(T_OP_IS, T.ORGGGGGGNEL(0x7f4fe8f8a180), NULL, 0)(0x7f4fe8f88a70) OR T.ORGGGGGGNEL(0x7f4fe8f8a180) != ?(0x7f4fe8f899c0)(0x7f4fe8f87df0)], [(T_OP_NOT_IN, T.sssss(0x7f4fe8f835e0), (?, ?)(0x7f4fe8f82920))(0x7f4fe8f81f30)], [(T_OP_IS, T.parean(0x7f4fe8f85ac0), NULL, 0)(0x7f4fe8f850f0)], [T.stats(0x7f4fe8f84740) = ?(0x7f4fe8f84020)], [T.cusystatus(0x7f4fe8f879e0) = ?(0x7f4fe8f872c0)]),
access([T.PID(0x7f4fe8f31660)], [T.city(0x7f4fe8f32d90)], [T.sssss(0x7f4fe8f835e0)], [T.stats(0x7f4fe8f84740)], [T.parean(0x7f4fe8f85ac0)], [T.cusystatus(0x7f4fe8f879e0)], [T.ORGGGGGGNEL(0x7f4fe8f8a180)], [T.CREATEDATE(0x7f4fe8f8f2e0)]), partitions(p0),
is_index_back=false, filter_before_indexback[false,false,false,false,false,false,false,false],
range_key([T.__pk_increment(0x7f4fe90416b0)]), range(MIN ; MAX)always true
3 - output([VIEW1.TOW.OID(0x7f4fe8febd70)], [VIEW1.TOW.CT_ID(0x7f4fe8fec060)]), filter(nil)
4 - output([VIEW1.TOW.OID(0x7f4fe8febd70)], [VIEW1.TOW.CT_ID(0x7f4fe8fec060)]), filter(nil), is_single, dop=1
5 - output([VIEW1.TOW.OID(0x7f4fe8febd70)], [VIEW1.TOW.CT_ID(0x7f4fe8fec060)]), filter(nil),
access([VIEW1.TOW.OID(0x7f4fe8febd70)], [VIEW1.TOW.CT_ID(0x7f4fe8fec060)])
6 - output([TOW.OID(0x7f68c95c53a0)], [TOW.CT_ID(0x7f68c95c5690)]), filter(nil),
conds(nil), nl_params_([TOW.OODDD_sTS(0x7f68c95c5980)]), batch_join=true
7 - output([TOW.OID(0x7f68c95c53a0)], [TOW.CT_ID(0x7f68c95c5690)], [TOW.OODDD_sTS(0x7f68c95c5980)]), filter(nil)
8 - output([TOW.OID(0x7f68c95c53a0)], [TOW.CT_ID(0x7f68c95c5690)], [TOW.OODDD_sTS(0x7f68c95c5980)]), filter(nil), is_single, dop=1
9 - output([TOW.OID(0x7f68c95c53a0)], [TOW.CT_ID(0x7f68c95c5690)], [TOW.OODDD_sTS(0x7f68c95c5980)]), filter([TOW.CT_ID(0x7f68c95c5690) = 2208(0x7f68c95c6540)], [TOW.CT_ID(0x7f68c95c5690) = ?(0x7f68c95c9230)]),
access([TOW.OID(0x7f68c95c53a0)], [TOW.CT_ID(0x7f68c95c5690)], [TOW.OODDD_sTS(0x7f68c95c5980)]), partitions(p0),
is_index_back=true, filter_before_indexback[true,true],
range_key([TOW.OID(0x7f68c95c53a0)], [TOW.WORK_STATION(0x7f68c95f8780)], [TOW.CT_ID(0x7f68c95c5690)], [TOW.__pk_increment(0x7f68c95f0c50)]), range(MIN,MIN,MIN,MIN ; MAX,MAX,MAX,MAX)always true,
range_cond([? = TOW.OID(0x7f68c95c53a0)(0x7f68c95c8950)])
10 - output([remove_const(1)(0x7f68c967ea70)]), filter([SD_CTM_GM.stats(0x7f68c95c5f60) = ?(0x7f68c95c7030)]),
access([SD_CTM_GM.stats(0x7f68c95c5f60)]), partitions(p0),
is_index_back=true, filter_before_indexback[false],
range_key([SD_CTM_GM.DDDC(0x7f68c95c6250)], [SD_CTM_GM.GPID(0x7f68c95c5c70)], [SD_CTM_GM.shadow_pk_0(0x7f68c961ab30)]), range(MIN ; MAX),
range_cond([SD_CTM_GM.GPID(0x7f68c95c5c70) = ?(0x7f68c95c80f0)], [? = SD_CTM_GM.DDDC(0x7f68c95c6250)(0x7f68c962db70)])
这时候可以看到效率提升到了1s,提升了40多倍,那么原因在哪,从上图可以看到执行计划基本一样,唯一的区别是连接方式。
从ESTED-LOOP ANTI JOIN反连接转化成了NESTED-LOOP OUTER JOIN左外连接,正常理解这两种连接方式效率差距不应该这么大,从Outputs & filters信息中可以看到左外连接用了一个px_batch_rescan=true算子。
我们可以从源码的join/nl这块看到px_batch_rescan的一些信息,可以对nl做一些优化,避免扫描多余的数据。
if (OB_SUCC(ret)) {
// 当nlj条件下推做分布式rescan, 开启px batch rescan
ObNestedLoopJoinSpec &nlj = static_cast<ObNestedLoopJoinSpec &>(spec);
if (op.enable_px_batch_rescan()) {
nlj.enable_px_batch_rescan_ = true;
nlj.group_size_ = PX_RESCAN_BATCH_ROW_COUNT;
} else {
nlj.enable_px_batch_rescan_ = false;
}
}
if (OB_SUCC(ret) && PHY_NESTED_LOOP_JOIN == spec.type_) {
ObNestedLoopJoinSpec &nlj = static_cast<ObNestedLoopJoinSpec &>(spec);
bool use_batch_nlj = op.can_use_batch_nlj();
if (use_batch_nlj) {
nlj.group_rescan_ = use_batch_nlj;
}
// 左边每一行出来后,去通知右侧 GI 实施 part id 过滤,避免 PKEY NLJ 场景下扫不必要分区
if (OB_SUCC(ret) && !get_spec().enable_px_batch_rescan_ && get_spec().enable_gi_partition_pruning_) {
ObDatum *datum = nullptr;
if (OB_FAIL(get_spec().gi_partition_id_expr_->eval(eval_ctx_, datum))) {
LOG_WARN("fail eval value", K(ret));
} else {
// NOTE: 如果右侧对应多张表,这里的逻辑也没有问题
// 如 A REPART TO NLJ (B JOIN C) 的场景
// 此时 GI 在 B 和 C 的上面
int64_t part_id = datum->get_int();
ctx_.get_gi_pruning_info().set_part_id(part_id);
}
而外连接也是在该块代码里的
// outer join
if (OB_SUCC(ret)) {
if (match_right_batch_end_ && no_match_row_found_ && need_left_join()) {
need_output_row_ = true;
}
}
但是反连接是被查询改写之后的算子,该代码在rewrite的transform模块,并不能用到px_batch_rescan的算子优化。
结论:
虽然一些条件下,ob会把反连接和半连接自动改写为外连接和内连接,但是条件相对苛刻(感兴趣的同学可以翻下代码的ob_transform_join_elimination.h的介绍),现阶段遇到这类sql,可能更多的还要依赖我们开发和维护人员去手动去优化。
行之所向,莫问远方。