-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #36 from VirtusLab/new_named_columns
Redesign handling of named columns * Separate general Column supertype from data type specific Col[T] * Remove column names from members of view-like refinements * Rely on tuples instead of varargs in user facing APIs of methods like select, agg, groupBy * Assign names to columns via implicit conversions
- Loading branch information
Showing
19 changed files
with
314 additions
and
205 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
package org.virtuslab.iskra | ||
|
||
import scala.compiletime.error | ||
|
||
import org.virtuslab.iskra.types.DataType | ||
|
||
// TODO should it be covariant or not? | ||
trait CollectColumns[-C]: | ||
type CollectedColumns <: Tuple | ||
def underlyingColumns(c: C): Seq[UntypedColumn] | ||
|
||
// Using `given ... with { ... }` syntax might sometimes break pattern match on `CollectColumns[...] { type CollectedColumns = cc }` | ||
|
||
object CollectColumns: | ||
given collectNamedColumn[N <: Name, T <: DataType]: CollectColumns[NamedColumn[N, T]] with | ||
type CollectedColumns = (N := T) *: EmptyTuple | ||
def underlyingColumns(c: NamedColumn[N, T]) = Seq(c.untyped) | ||
|
||
given collectColumnsWithSchema[S <: Tuple]: CollectColumns[ColumnsWithSchema[S]] with | ||
type CollectedColumns = S | ||
def underlyingColumns(c: ColumnsWithSchema[S]) = c.underlyingColumns | ||
|
||
given collectEmptyTuple[S]: CollectColumns[EmptyTuple] with | ||
type CollectedColumns = EmptyTuple | ||
def underlyingColumns(c: EmptyTuple) = Seq.empty | ||
|
||
given collectCons[H, T <: Tuple](using collectHead: CollectColumns[H], collectTail: CollectColumns[T]): (CollectColumns[H *: T] { type CollectedColumns = Tuple.Concat[collectHead.CollectedColumns, collectTail.CollectedColumns] }) = | ||
new CollectColumns[H *: T]: | ||
type CollectedColumns = Tuple.Concat[collectHead.CollectedColumns, collectTail.CollectedColumns] | ||
def underlyingColumns(c: H *: T) = collectHead.underlyingColumns(c.head) ++ collectTail.underlyingColumns(c.tail) | ||
|
||
|
||
// TODO Customize error message for different operations with an explanation | ||
class CannotCollectColumns(typeName: String) | ||
extends Exception(s"Could not find an instance of CollectColumns for ${typeName}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,73 +1,94 @@ | ||
package org.virtuslab.iskra | ||
|
||
import scala.language.implicitConversions | ||
|
||
import scala.quoted.* | ||
|
||
import org.apache.spark.sql.{Column => UntypedColumn} | ||
import types.DataType | ||
import MacroHelpers.TupleSubtype | ||
|
||
class Column(val untyped: UntypedColumn): | ||
inline def name(using v: ValueOf[Name]): Name = v.value | ||
|
||
object Column: | ||
implicit transparent inline def columnToNamedColumn(inline col: Col[?]): NamedColumn[?, ?] = | ||
${ columnToNamedColumnImpl('col) } | ||
|
||
private def columnToNamedColumnImpl(col: Expr[Col[?]])(using Quotes): Expr[NamedColumn[?, ?]] = | ||
import quotes.reflect.* | ||
col match | ||
case '{ ($v: StructuralSchemaView).selectDynamic($nm: Name).$asInstanceOf$[Col[tp]] } => | ||
nm.asTerm.tpe.asType match | ||
case '[Name.Subtype[n]] => | ||
'{ NamedColumn[n, tp](${ col }.untyped.as(${ nm })) } | ||
case '{ $c: Col[tp] } => | ||
col.asTerm match | ||
case Inlined(_, _, Ident(name)) => | ||
ConstantType(StringConstant(name)).asType match | ||
case '[Name.Subtype[n]] => | ||
val alias = Literal(StringConstant(name)).asExprOf[Name] | ||
'{ NamedColumn[n, tp](${ col }.untyped.as(${ alias })) } | ||
|
||
extension [T <: DataType](col: Col[T]) | ||
inline def as[N <: Name](name: N): NamedColumn[N, T] = | ||
NamedColumn[N, T](col.untyped.as(name)) | ||
inline def alias[N <: Name](name: N): NamedColumn[N, T] = | ||
NamedColumn[N, T](col.untyped.as(name)) | ||
|
||
extension [T1 <: DataType](col1: Col[T1]) | ||
inline def +[T2 <: DataType](col2: Col[T2])(using op: ColumnOp.Plus[T1, T2]): Col[op.Out] = op(col1, col2) | ||
inline def -[T2 <: DataType](col2: Col[T2])(using op: ColumnOp.Minus[T1, T2]): Col[op.Out] = op(col1, col2) | ||
inline def *[T2 <: DataType](col2: Col[T2])(using op: ColumnOp.Mult[T1, T2]): Col[op.Out] = op(col1, col2) | ||
inline def /[T2 <: DataType](col2: Col[T2])(using op: ColumnOp.Div[T1, T2]): Col[op.Out] = op(col1, col2) | ||
inline def ++[T2 <: DataType](col2: Col[T2])(using op: ColumnOp.PlusPlus[T1, T2]): Col[op.Out] = op(col1, col2) | ||
inline def <[T2 <: DataType](col2: Col[T2])(using op: ColumnOp.Lt[T1, T2]): Col[op.Out] = op(col1, col2) | ||
inline def <=[T2 <: DataType](col2: Col[T2])(using op: ColumnOp.Le[T1, T2]): Col[op.Out] = op(col1, col2) | ||
inline def >[T2 <: DataType](col2: Col[T2])(using op: ColumnOp.Gt[T1, T2]): Col[op.Out] = op(col1, col2) | ||
inline def >=[T2 <: DataType](col2: Col[T2])(using op: ColumnOp.Ge[T1, T2]): Col[op.Out] = op(col1, col2) | ||
inline def ===[T2 <: DataType](col2: Col[T2])(using op: ColumnOp.Eq[T1, T2]): Col[op.Out] = op(col1, col2) | ||
inline def =!=[T2 <: DataType](col2: Col[T2])(using op: ColumnOp.Ne[T1, T2]): Col[op.Out] = op(col1, col2) | ||
inline def &&[T2 <: DataType](col2: Col[T2])(using op: ColumnOp.And[T1, T2]): Col[op.Out] = op(col1, col2) | ||
inline def ||[T2 <: DataType](col2: Col[T2])(using op: ColumnOp.Or[T1, T2]): Col[op.Out] = op(col1, col2) | ||
|
||
|
||
class Col[+T <: DataType](untyped: UntypedColumn) extends Column(untyped) | ||
|
||
sealed trait NamedColumns[Schema](val underlyingColumns: Seq[UntypedColumn]) | ||
|
||
object Columns: | ||
transparent inline def apply(inline columns: NamedColumns[?]*): NamedColumns[?] = ${ applyImpl('columns) } | ||
transparent inline def apply[C <: NamedColumns](columns: C): ColumnsWithSchema[?] = ${ applyImpl('columns) } | ||
|
||
private def applyImpl(columns: Expr[Seq[NamedColumns[?]]])(using Quotes): Expr[NamedColumns[?]] = | ||
private def applyImpl[C : Type](columns: Expr[C])(using Quotes): Expr[ColumnsWithSchema[?]] = | ||
import quotes.reflect.* | ||
|
||
val columnValuesWithTypes = columns match | ||
case Varargs(colExprs) => | ||
colExprs.map { arg => | ||
arg match | ||
case '{ $value: NamedColumns[schema] } => ('{ ${ value }.underlyingColumns }, Type.of[schema]) | ||
} | ||
Expr.summon[CollectColumns[C]] match | ||
case Some(collectColumns) => | ||
collectColumns match | ||
case '{ $cc: CollectColumns[?] { type CollectedColumns = collectedColumns } } => | ||
Type.of[collectedColumns] match | ||
case '[TupleSubtype[collectedCols]] => | ||
'{ | ||
val cols = ${ cc }.underlyingColumns(${ columns }) | ||
ColumnsWithSchema[collectedCols](cols) | ||
} | ||
case None => | ||
throw CollectColumns.CannotCollectColumns(Type.show[C]) | ||
|
||
val columnsValues = columnValuesWithTypes.map(_._1) | ||
val columnsTypes = columnValuesWithTypes.map(_._2) | ||
|
||
val schemaTpe = FrameSchema.schemaTypeFromColumnsTypes(columnsTypes) | ||
trait NamedColumnOrColumnsLike | ||
|
||
schemaTpe match | ||
case '[s] => | ||
'{ | ||
val cols = ${ Expr.ofSeq(columnsValues) }.flatten | ||
new NamedColumns[s](cols) {} | ||
} | ||
type NamedColumns = Repeated[NamedColumnOrColumnsLike] | ||
|
||
class Column[+T <: DataType](val untyped: UntypedColumn): | ||
class NamedColumn[N <: Name, T <: DataType](val untyped: UntypedColumn) | ||
extends NamedColumnOrColumnsLike | ||
|
||
inline def name(using v: ValueOf[Name]): Name = v.value | ||
class ColumnsWithSchema[Schema <: Tuple](val underlyingColumns: Seq[UntypedColumn]) extends NamedColumnOrColumnsLike | ||
|
||
object Column: | ||
extension [T <: DataType](col: Column[T]) | ||
inline def as[N <: Name](name: N)(using v: ValueOf[N]): LabeledColumn[N, T] = | ||
LabeledColumn[N, T](col.untyped.as(v.value)) | ||
inline def alias[N <: Name](name: N)(using v: ValueOf[N]): LabeledColumn[N, T] = | ||
LabeledColumn[N, T](col.untyped.as(v.value)) | ||
|
||
extension [T1 <: DataType](col1: Column[T1]) | ||
inline def +[T2 <: DataType](col2: Column[T2])(using op: ColumnOp.Plus[T1, T2]): Column[op.Out] = op(col1, col2) | ||
inline def -[T2 <: DataType](col2: Column[T2])(using op: ColumnOp.Minus[T1, T2]): Column[op.Out] = op(col1, col2) | ||
inline def *[T2 <: DataType](col2: Column[T2])(using op: ColumnOp.Mult[T1, T2]): Column[op.Out] = op(col1, col2) | ||
inline def /[T2 <: DataType](col2: Column[T2])(using op: ColumnOp.Div[T1, T2]): Column[op.Out] = op(col1, col2) | ||
inline def ++[T2 <: DataType](col2: Column[T2])(using op: ColumnOp.PlusPlus[T1, T2]): Column[op.Out] = op(col1, col2) | ||
inline def <[T2 <: DataType](col2: Column[T2])(using op: ColumnOp.Lt[T1, T2]): Column[op.Out] = op(col1, col2) | ||
inline def <=[T2 <: DataType](col2: Column[T2])(using op: ColumnOp.Le[T1, T2]): Column[op.Out] = op(col1, col2) | ||
inline def >[T2 <: DataType](col2: Column[T2])(using op: ColumnOp.Gt[T1, T2]): Column[op.Out] = op(col1, col2) | ||
inline def >=[T2 <: DataType](col2: Column[T2])(using op: ColumnOp.Ge[T1, T2]): Column[op.Out] = op(col1, col2) | ||
inline def ===[T2 <: DataType](col2: Column[T2])(using op: ColumnOp.Eq[T1, T2]): Column[op.Out] = op(col1, col2) | ||
inline def =!=[T2 <: DataType](col2: Column[T2])(using op: ColumnOp.Ne[T1, T2]): Column[op.Out] = op(col1, col2) | ||
inline def &&[T2 <: DataType](col2: Column[T2])(using op: ColumnOp.And[T1, T2]): Column[op.Out] = op(col1, col2) | ||
inline def ||[T2 <: DataType](col2: Column[T2])(using op: ColumnOp.Or[T1, T2]): Column[op.Out] = op(col1, col2) | ||
|
||
@annotation.showAsInfix | ||
class :=[L <: LabeledColumn.Label, T <: DataType](untyped: UntypedColumn) | ||
extends Column[T](untyped) | ||
with NamedColumns[(L := T) *: EmptyTuple](Seq(untyped)) | ||
trait :=[L <: ColumnLabel, T <: DataType] | ||
|
||
@annotation.showAsInfix | ||
trait /[+Prefix <: Name, +Suffix <: Name] | ||
|
||
type LabeledColumn[L <: LabeledColumn.Label, T <: DataType] = :=[L, T] | ||
|
||
object LabeledColumn: | ||
type Label = Name | (Name / Name) | ||
def apply[L <: LabeledColumn.Label, T <: DataType](untyped: UntypedColumn) = new :=[L, T](untyped) | ||
type ColumnLabel = Name | (Name / Name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package org.virtuslab.iskra | ||
|
||
type Repeated[A] = | ||
A | ||
| (A, A) | ||
| (A, A, A) | ||
| (A, A, A, A) | ||
| (A, A, A, A, A) | ||
| (A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A) | ||
| (A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A) // 22 is maximal arity |
Oops, something went wrong.