Skip to content

Commit

Permalink
[MOREL-231] Add distinct keyword as shorthand for group with all …
Browse files Browse the repository at this point in the history
…fields and no aggregate functions

Add distinct keyword as shorthand for group with all fields and
no aggregate functions. For example, the following are
equivalent:

  (*) Using `distinct`
  from e in scott.emp
    yield {e.job, e.deptno}
    distinct;

  (*) Using `group` with no aggregate functions
  from e in scott.emp
    yield {e.job, e.deptno}
    group job, deptno;

This is analogous to how, in SQL,

  SELECT DISTINCT job, deptno FROM emp

is equivalent to

  SELECT job, deptno FROM emp GROUP BY job, deptno

SQL has a complication: you are allowed to use DISTINCT in the
same SELECT block as a GROUP BY, and it is applied after
aggregate functions are evaluated. Thankfully Morel does not
have to worry about SELECT blocks.

The implementation adds a `Distinct` class to the AST but
desugars it to `Group` in the core algebra.

Fixes #231
  • Loading branch information
julianhyde committed Dec 13, 2024
1 parent da52b1e commit b10fe6e
Show file tree
Hide file tree
Showing 13 changed files with 125 additions and 2 deletions.
3 changes: 2 additions & 1 deletion docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ just because they take effort to build.
Contributions are welcome!

In Morel but not Standard ML:
* `from` expression with `in`, `join`, `where`, `group`,
* `from` expression with `in`, `join`, `where`, `distinct`, `group`,
`compute`, `into`, `order`, `skip`, `take`, `through`, `yield` clauses
* `union`, `except`, `intersect`, `elem`, `notelem` operators
* "*lab* `=`" is optional in `exprow`
Expand Down Expand Up @@ -153,6 +153,7 @@ In Standard ML but not in Morel:
<i>step</i> &rarr; <b>where</b> <i>exp</i> filter clause
| <b>join</b> <i>scan<sub>1</sub></i> [ <b>,</b> ... <b>,</b> <i>scan<sub>s</sub></i> ]
join clause
| <b>distinct</b> distinct clause
| <b>group</b> <i>groupKey<sub>1</sub></i> <b>,</b> ... <b>,</b> <i>groupKey<sub>g</sub></i>
[ <b>compute</b> <i>agg<sub>1</sub></i> <b>,</b> ... <b>,</b> <i>agg<sub>a</sub></i> ]
group clause (<i>g</i> &ge; 0, <i>a</i> &ge; 1)
Expand Down
19 changes: 19 additions & 0 deletions src/main/java/net/hydromatic/morel/ast/Ast.java
Original file line number Diff line number Diff line change
Expand Up @@ -1687,6 +1687,25 @@ public Scan copy(Pat pat, @Nullable Exp exp, @Nullable Exp condition) {
}
}

/** A {@code distinct} clause in a {@code from} expression. */
public static class Distinct extends FromStep {
Distinct(Pos pos) {
super(pos, Op.DISTINCT);
}

@Override AstWriter unparse(AstWriter w, int left, int right) {
return w.append(" distinct");
}

@Override public AstNode accept(Shuttle shuttle) {
return shuttle.visit(this);
}

@Override public void accept(Visitor visitor) {
visitor.visit(this);
}
}

/** A {@code where} clause in a {@code from} expression. */
public static class Where extends FromStep {
public final Exp exp;
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/net/hydromatic/morel/ast/AstBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,10 @@ public Ast.FromStep where(Pos pos, Ast.Exp exp) {
return new Ast.Where(pos, exp);
}

public Ast.FromStep distinct(Pos pos) {
return new Ast.Distinct(pos);
}

public Ast.FromStep skip(Pos pos, Ast.Exp exp) {
return new Ast.Skip(pos, exp);
}
Expand Down
8 changes: 8 additions & 0 deletions src/main/java/net/hydromatic/morel/ast/FromBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableRangeSet;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Range;
import org.apache.calcite.util.Util;
Expand Down Expand Up @@ -239,6 +240,13 @@ public FromBuilder take(Core.Exp count) {
return addStep(core.take(bindings, count));
}

public FromBuilder distinct() {
final ImmutableSortedMap.Builder<Core.IdPat, Core.Exp> groupExpsB =
ImmutableSortedMap.naturalOrder();
bindings.forEach(b -> groupExpsB.put((Core.IdPat) b.id, core.id(b.id)));
return addStep(core.group(groupExpsB.build(), ImmutableSortedMap.of()));
}

public FromBuilder group(SortedMap<Core.IdPat, Core.Exp> groupExps,
SortedMap<Core.IdPat, Core.Aggregate> aggregates) {
return addStep(core.group(groupExps, aggregates));
Expand Down
1 change: 1 addition & 0 deletions src/main/java/net/hydromatic/morel/ast/Op.java
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ public enum Op {
CASE,
FROM,
SCAN(" "),
DISTINCT,
WHERE,
GROUP,
COMPUTE,
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/net/hydromatic/morel/ast/Shuttle.java
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,10 @@ protected AstNode visit(Ast.Where where) {
return ast.where(where.pos, where.exp.accept(this));
}

protected AstNode visit(Ast.Distinct distinct) {
return distinct;
}

protected AstNode visit(Ast.Skip skip) {
return ast.skip(skip.pos, skip.exp.accept(this));
}
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/net/hydromatic/morel/ast/Visitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ protected void visit(Ast.OrderItem orderItem) {
orderItem.exp.accept(this);
}

protected void visit(Ast.Distinct distinct) {
}

protected void visit(Ast.Where where) {
where.exp.accept(this);
}
Expand Down
5 changes: 4 additions & 1 deletion src/main/java/net/hydromatic/morel/compile/Resolver.java
Original file line number Diff line number Diff line change
Expand Up @@ -956,8 +956,11 @@ private Core.Exp run(List<Ast.FromStep> steps) {
r.toCore(aggregate, groupExps.keySet())));
fromBuilder.group(groupExps, aggregates.build());
}
}

@Override protected void visit(Ast.Distinct distinct) {
fromBuilder.distinct();
}
}
}

// End Resolver.java
5 changes: 5 additions & 0 deletions src/main/java/net/hydromatic/morel/compile/TypeResolver.java
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,11 @@ private Pair<TypeEnv, Unifier.Variable> deduceStepType(TypeEnv env,
fromSteps.add(where.copy(filter2));
return Pair.of(env2, v);

case DISTINCT:
final Ast.Distinct distinct = (Ast.Distinct) step;
fromSteps.add(distinct);
return Pair.of(env2, v);

case SKIP:
final Ast.Skip skip = (Ast.Skip) step;
final Unifier.Variable v11 = unifier.variable();
Expand Down
5 changes: 5 additions & 0 deletions src/main/javacc/MorelParser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,10 @@ void fromStep(List<FromStep> steps) :
<COMPUTE> { span = Span.of(pos()); } aggregates = aggregateCommaList() {
steps.add(ast.compute(span.end(this), aggregates));
}
|
<DISTINCT> {
steps.add(ast.distinct(pos()));
}
|
<INTO> { span = Span.of(pos()); } intoExp = expression() {
steps.add(ast.into(span.end(this), intoExp));
Expand Down Expand Up @@ -1452,6 +1456,7 @@ AstNode statementEof() :
// The following are relational extensions:
| < COMPUTE: "compute" >
| < DESC: "desc" >
| < DISTINCT: "distinct" >
| < FROM: "from" >
| < GROUP: "group" >
| < INTO: "into" >
Expand Down
26 changes: 26 additions & 0 deletions src/test/java/net/hydromatic/morel/FromBuilderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,32 @@ FromBuilder fromBuilder() {
assertThat(e2, is(e));
}

@Test void testDistinct() {
// from i in [1, 2] distinct
final Fixture f = new Fixture();
final FromBuilder fromBuilder = f.fromBuilder();
fromBuilder.scan(f.iPat, f.list12);
fromBuilder.distinct();

final Core.From from = fromBuilder.build();
assertThat(from, hasToString("from i in [1, 2] group i = i"));

// from i in [1, 2],
// j in [3, 4]
// distinct
// where i < j
fromBuilder.clear();
fromBuilder.scan(f.iPat, f.list12);
fromBuilder.scan(f.jPat, f.list34);
fromBuilder.distinct();
fromBuilder.where(core.lessThan(f.typeSystem, f.iId, f.jId));

final Core.From from2 = fromBuilder.build();
assertThat(from2,
hasToString("from i in [1, 2] join j in [3, 4] "
+ "group i = i, j = j where i < j"));
}

@Test void testWhereOrder() {
// from i in [1, 2] where i < 2 order i desc
// ==>
Expand Down
2 changes: 2 additions & 0 deletions src/test/java/net/hydromatic/morel/MainTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1738,6 +1738,8 @@ private static List<Object> node(Object... args) {
}
ml("from e in (from z in emps), d in (from y in depts) on c")
.assertParseSame();
ml("from e in emps distinct").assertParseSame();
ml("from e in emps distinct where deptno > 10").assertParseSame();
ml("from e in emps\n"
+ " group e.deptno\n"
+ " join d in depts on deptno = d.deptno\n"
Expand Down
42 changes: 42 additions & 0 deletions src/test/resources/script/relational.smli
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,13 @@ from e in emps
order d;
> val it = [10,20,30] : int list

(*) singleton record 'yield' followed by 'distinct'
from e in emps
yield {d = e.deptno}
distinct
order d;
> val it = [10,20,30] : int list

(*) singleton record 'yield' followed by 'group'
from e in emps
yield {d = e.deptno}
Expand Down Expand Up @@ -309,6 +316,30 @@ from e in emps
end;
> val it = [200,202,204,206] : int list

(*) 'distinct' is equivalent to 'group' with all fields
(*) Query 1, using `distinct`
from e in scott.emp
yield {e.job, e.deptno}
distinct;
> val it =
> [{deptno=10,job="MANAGER"},{deptno=10,job="PRESIDENT"},
> {deptno=20,job="CLERK"},{deptno=30,job="MANAGER"},{deptno=20,job="ANALYST"},
> {deptno=30,job="SALESMAN"},{deptno=30,job="CLERK"},
> {deptno=20,job="MANAGER"},{deptno=10,job="CLERK"}]
> : {deptno:int, job:string} list

(*) Query 2, using `group` with no aggregate functions, equivalent to query 1
from e in scott.emp
yield {e.job, e.deptno}
group job, deptno;
> val it =
> [{deptno=10,job="MANAGER"},{deptno=10,job="PRESIDENT"},
> {deptno=20,job="CLERK"},{deptno=30,job="MANAGER"},{deptno=20,job="ANALYST"},
> {deptno=30,job="SALESMAN"},{deptno=30,job="CLERK"},
> {deptno=20,job="MANAGER"},{deptno=10,job="CLERK"}]
> : {deptno:int, job:string} list


(*) Function defined inside query
from e in emps
where e.deptno < 30
Expand Down Expand Up @@ -506,6 +537,17 @@ from e in emps
join d in depts on deptno = d.deptno;
*)

(*) 'join' followed by 'distinct'
from e in emps
join d in depts on e.deptno = d.deptno
distinct;
> val it =
> [{d={deptno=10,name="Sales"},e={deptno=10,id=100,name="Fred"}},
> {d={deptno=20,name="HR"},e={deptno=20,id=101,name="Velma"}},
> {d={deptno=30,name="Engineering"},e={deptno=30,id=102,name="Shaggy"}},
> {d={deptno=30,name="Engineering"},e={deptno=30,id=103,name="Scooby"}}]
> : {d:{deptno:int, name:string}, e:{deptno:int, id:int, name:string}} list

(*) exists (defining the "exists" function ourselves)
(*) and correlated sub-query
let
Expand Down

0 comments on commit b10fe6e

Please sign in to comment.