From 72eb56a31819815aa61a46f64fe18b59a8c38b2e Mon Sep 17 00:00:00 2001 From: Noah Treuhaft Date: Wed, 27 Sep 2023 09:29:26 -0400 Subject: [PATCH 1/2] Remove unnecessary sorts before joins in ztests Remove sort operators before joins in ztests that are unnecessary now that join automatically sorts its inputs. compiler/ztests/join-desc.yaml is left unmodified since it is explicitly testing inputs with descending order. zfmt/ztests/decls.yaml is modified for consistency, even though removing the sorts affects the output (which is formatted Zed, not query results). --- compiler/ztests/from-pass.yaml | 5 +++-- .../ztests/language-directed-acyclic-flow-graphs-2.yaml | 4 ++-- lake/ztests/cross-pool-join.yaml | 4 ++-- lake/ztests/index/apply.yaml | 4 ++-- lake/ztests/index/update.yaml | 4 ++-- runtime/op/join/ztests/cut-like.yaml | 4 ++-- runtime/op/join/ztests/kinds.yaml | 8 ++++---- service/ztests/index/apply.yaml | 4 ++-- service/ztests/index/update.yaml | 4 ++-- zfmt/ztests/decls.yaml | 8 ++------ 10 files changed, 23 insertions(+), 26 deletions(-) diff --git a/compiler/ztests/from-pass.yaml b/compiler/ztests/from-pass.yaml index 38fbec79a9..6a7f67464f 100644 --- a/compiler/ztests/from-pass.yaml +++ b/compiler/ztests/from-pass.yaml @@ -4,8 +4,8 @@ inputs: - name: join.zed data: | * | from ( - pass => sort x - file right.zson => sort y + pass + file right.zson ) | inner join on x=y matched:=true - name: left.zson data: | @@ -15,6 +15,7 @@ inputs: - name: right.zson data: | {y:2,y:"y-two"} + outputs: - name: stdout data: | diff --git a/docs/language/ztests/language-directed-acyclic-flow-graphs-2.yaml b/docs/language/ztests/language-directed-acyclic-flow-graphs-2.yaml index 03406272a9..9887aefeff 100644 --- a/docs/language/ztests/language-directed-acyclic-flow-graphs-2.yaml +++ b/docs/language/ztests/language-directed-acyclic-flow-graphs-2.yaml @@ -43,8 +43,8 @@ inputs: - name: join.zed data: | from ( - pool PoolOne => put color:=upper(color) | put namelen:=len(key) | sort key - pool PoolTwo => put price:=price+0.5 | put tag:="mytag" | sort key + pool PoolOne => put color:=upper(color) | put namelen:=len(key) + pool PoolTwo => put price:=price+0.5 | put tag:="mytag" ) | join on key=key priceinfo:=this | drop priceinfo.key outputs: diff --git a/lake/ztests/cross-pool-join.yaml b/lake/ztests/cross-pool-join.yaml index 985503622f..63bacf402c 100644 --- a/lake/ztests/cross-pool-join.yaml +++ b/lake/ztests/cross-pool-join.yaml @@ -24,8 +24,8 @@ inputs: - name: join.zed data: | from ( - pool fruit => cut f:=this | sort f.flavor - pool person => cut p:=this | sort p.likes + pool fruit => cut f:=this + pool person => cut p:=this ) | join on f.flavor=p.likes p:=p | summarize likes:=union(f.name) by name:=p.name | sort name diff --git a/lake/ztests/index/apply.yaml b/lake/ztests/index/apply.yaml index 8eb296c835..14a485ad58 100644 --- a/lake/ztests/index/apply.yaml +++ b/lake/ztests/index/apply.yaml @@ -18,8 +18,8 @@ inputs: - name: query.zed data: | from ( - pool :index_rules => sort id - pool test@main:indexes => sort rule.id | cut o:=this + pool :index_rules + pool test@main:indexes => cut o:=this ) | left join on id = o.rule.id o | count(o) by name, fields diff --git a/lake/ztests/index/update.yaml b/lake/ztests/index/update.yaml index 57b1975c7a..ad5dd3ab01 100644 --- a/lake/ztests/index/update.yaml +++ b/lake/ztests/index/update.yaml @@ -23,8 +23,8 @@ inputs: - name: query.zed data: | from ( - pool :index_rules => sort id - pool test@main:indexes => sort rule.id | cut o:=this + pool :index_rules + pool test@main:indexes => cut o:=this ) | left join on id = o.rule.id o | count(o) by name,fields diff --git a/runtime/op/join/ztests/cut-like.yaml b/runtime/op/join/ztests/cut-like.yaml index f7bd333dab..ef5aa5b233 100644 --- a/runtime/op/join/ztests/cut-like.yaml +++ b/runtime/op/join/ztests/cut-like.yaml @@ -4,8 +4,8 @@ inputs: - name: test.zed data: | from ( - file people.ndjson => sort likes - file fruit.ndjson => sort flavor + file people.ndjson + file fruit.ndjson ) | left join on likes=flavor fruit:=name,note:=quiet(note) - name: people.ndjson data: | diff --git a/runtime/op/join/ztests/kinds.yaml b/runtime/op/join/ztests/kinds.yaml index 250d44f31e..c1eb324e78 100644 --- a/runtime/op/join/ztests/kinds.yaml +++ b/runtime/op/join/ztests/kinds.yaml @@ -1,12 +1,12 @@ script: | echo === ANTI === - zq -z 'anti join (file B.zson) on a=b | sort a' A.zson + zq -z 'anti join (file B.zson) on a=b' A.zson echo === LEFT === - zq -z 'left join (file B.zson) on a=b hit:=sb | sort a' A.zson + zq -z 'left join (file B.zson) on a=b hit:=sb' A.zson echo === INNER === - zq -z 'inner join (file B.zson) on a=b hit:=sb | sort a' A.zson + zq -z 'inner join (file B.zson) on a=b hit:=sb' A.zson echo === RIGHT === - zq -z 'right join (file C.zson) on b=c hit:=sb | sort c' B.zson + zq -z 'right join (file C.zson) on b=c hit:=sb' B.zson inputs: - name: A.zson diff --git a/service/ztests/index/apply.yaml b/service/ztests/index/apply.yaml index 5e450f272d..fcf7f99ccd 100644 --- a/service/ztests/index/apply.yaml +++ b/service/ztests/index/apply.yaml @@ -19,8 +19,8 @@ inputs: - name: query.zed data: | from ( - pool :index_rules => sort id - pool test@main:indexes => sort rule.id | cut o:=this + pool :index_rules + pool test@main:indexes => cut o:=this ) | left join on id = o.rule.id o | count(o) by name, fields diff --git a/service/ztests/index/update.yaml b/service/ztests/index/update.yaml index 23afdf0f76..3994c70943 100644 --- a/service/ztests/index/update.yaml +++ b/service/ztests/index/update.yaml @@ -24,8 +24,8 @@ inputs: - name: query.zed data: | from ( - pool :index_rules => sort id - pool test@main:indexes => sort rule.id | cut o:=this + pool :index_rules + pool test@main:indexes => cut o:=this ) | left join on id = o.rule.id o | count(o) by name,fields diff --git a/zfmt/ztests/decls.yaml b/zfmt/ztests/decls.yaml index eec7700923..78753b200f 100644 --- a/zfmt/ztests/decls.yaml +++ b/zfmt/ztests/decls.yaml @@ -13,9 +13,9 @@ inputs: op stamp(assignee): ( yield {...this, assignee, ts: now()} ) op nop(foo): ( pass ) op joinTest(left_file, right_file, left_key, right_key, left_dest, right_source): ( - file left_file | sort left_key + file left_file | inner join ( - file right_file | sort right_key + file right_file ) on left_key = right_key left_dest := right_source ) joinTest("fruit.ndjson", "people.ndjson", flavor, likes, eater, name) @@ -42,12 +42,10 @@ outputs: from ( file left_file ) - | sort left_key | join ( from ( file right_file ) - | sort right_key ) on left_key=right_key left_dest:=right_source ) joinTest("fruit.ndjson", "people.ndjson", flavor, likes, eater, name) @@ -64,13 +62,11 @@ outputs: ) file fruit.ndjson - | sort flavor | fork ( => pass => file people.ndjson - | sort likes ) | join on flavor=likes eater:=name | yield {...this,assignee:"bob",ts:now()} From 57116e9f92e4727e8bcbb310cbc366cd0af386b7 Mon Sep 17 00:00:00 2001 From: Noah Treuhaft Date: Thu, 28 Sep 2023 10:55:22 -0400 Subject: [PATCH 2/2] Revert runtime/op/join/ztests/kinds.yaml --- runtime/op/join/ztests/kinds.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/runtime/op/join/ztests/kinds.yaml b/runtime/op/join/ztests/kinds.yaml index c1eb324e78..250d44f31e 100644 --- a/runtime/op/join/ztests/kinds.yaml +++ b/runtime/op/join/ztests/kinds.yaml @@ -1,12 +1,12 @@ script: | echo === ANTI === - zq -z 'anti join (file B.zson) on a=b' A.zson + zq -z 'anti join (file B.zson) on a=b | sort a' A.zson echo === LEFT === - zq -z 'left join (file B.zson) on a=b hit:=sb' A.zson + zq -z 'left join (file B.zson) on a=b hit:=sb | sort a' A.zson echo === INNER === - zq -z 'inner join (file B.zson) on a=b hit:=sb' A.zson + zq -z 'inner join (file B.zson) on a=b hit:=sb | sort a' A.zson echo === RIGHT === - zq -z 'right join (file C.zson) on b=c hit:=sb' B.zson + zq -z 'right join (file C.zson) on b=c hit:=sb | sort c' B.zson inputs: - name: A.zson