Skip to content

Commit

Permalink
feat(sqllab): TRINO_EXPAND_ROWS: expand columns from ROWs (apache#25809)
Browse files Browse the repository at this point in the history
  • Loading branch information
giftig authored Nov 20, 2023
1 parent 411dba2 commit 8d73ab9
Show file tree
Hide file tree
Showing 12 changed files with 268 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ const ExtraOptions = ({
/>
</div>
</StyledInputContainer>
<StyledInputContainer>
<StyledInputContainer css={no_margin_bottom}>
<div className="input-container">
<IndeterminateCheckbox
id="disable_data_preview"
Expand All @@ -220,6 +220,22 @@ const ExtraOptions = ({
/>
</div>
</StyledInputContainer>
<StyledInputContainer>
<div className="input-container">
<IndeterminateCheckbox
id="expand_rows"
indeterminate={false}
checked={!!extraJson?.schema_options?.expand_rows}
onChange={onExtraInputChange}
labelText={t('Enable row expansion in schemas')}
/>
<InfoTooltip
tooltip={t(
'For Trino, describe full schemas of nested ROW types, expanding them with dotted paths',
)}
/>
</div>
</StyledInputContainer>
</StyledExpandableForm>
</StyledInputContainer>
</Collapse.Panel>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ describe('DatabaseModal', () => {
const exposeInSQLLabCheckbox = screen.getByRole('checkbox', {
name: /expose database in sql lab/i,
});
// This is both the checkbox and it's respective SVG
// This is both the checkbox and its respective SVG
// const exposeInSQLLabCheckboxSVG = checkboxOffSVGs[0].parentElement;
const exposeInSQLLabText = screen.getByText(
/expose database in sql lab/i,
Expand Down Expand Up @@ -721,6 +721,13 @@ describe('DatabaseModal', () => {
/Disable SQL Lab data preview queries/i,
);

const enableRowExpansionCheckbox = screen.getByRole('checkbox', {
name: /enable row expansion in schemas/i,
});
const enableRowExpansionText = screen.getByText(
/enable row expansion in schemas/i,
);

// ---------- Assertions ----------
const visibleComponents = [
closeButton,
Expand All @@ -737,13 +744,15 @@ describe('DatabaseModal', () => {
checkboxOffSVGs[2],
checkboxOffSVGs[3],
checkboxOffSVGs[4],
checkboxOffSVGs[5],
tooltipIcons[0],
tooltipIcons[1],
tooltipIcons[2],
tooltipIcons[3],
tooltipIcons[4],
tooltipIcons[5],
tooltipIcons[6],
tooltipIcons[7],
exposeInSQLLabText,
allowCTASText,
allowCVASText,
Expand All @@ -754,6 +763,7 @@ describe('DatabaseModal', () => {
enableQueryCostEstimationText,
allowDbExplorationText,
disableSQLLabDataPreviewQueriesText,
enableRowExpansionText,
];
// These components exist in the DOM but are not visible
const invisibleComponents = [
Expand All @@ -764,15 +774,16 @@ describe('DatabaseModal', () => {
enableQueryCostEstimationCheckbox,
allowDbExplorationCheckbox,
disableSQLLabDataPreviewQueriesCheckbox,
enableRowExpansionCheckbox,
];
visibleComponents.forEach(component => {
expect(component).toBeVisible();
});
invisibleComponents.forEach(component => {
expect(component).not.toBeVisible();
});
expect(checkboxOffSVGs).toHaveLength(5);
expect(tooltipIcons).toHaveLength(7);
expect(checkboxOffSVGs).toHaveLength(6);
expect(tooltipIcons).toHaveLength(8);
});

test('renders the "Advanced" - PERFORMANCE tab correctly', async () => {
Expand Down
12 changes: 12 additions & 0 deletions superset-frontend/src/features/databases/DatabaseModal/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,18 @@ export function dbReducer(
}),
};
}
if (action.payload.name === 'expand_rows') {
return {
...trimmedState,
extra: JSON.stringify({
...extraJson,
schema_options: {
...extraJson?.schema_options,
[action.payload.name]: !!action.payload.value,
},
}),
};
}
return {
...trimmedState,
extra: JSON.stringify({
Expand Down
3 changes: 3 additions & 0 deletions superset-frontend/src/features/databases/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -226,5 +226,8 @@ export interface ExtraJson {
table_cache_timeout?: number; // in Performance
}; // No field, holds schema and table timeout
schemas_allowed_for_file_upload?: string[]; // in Security
schema_options?: {
expand_rows?: boolean;
};
version?: string;
}
19 changes: 15 additions & 4 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
from sqlalchemy.engine.url import URL
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.orm import Session
from sqlalchemy.sql import quoted_name, text
from sqlalchemy.sql import literal_column, quoted_name, text
from sqlalchemy.sql.expression import ColumnClause, Select, TextAsFrom, TextClause
from sqlalchemy.types import TypeEngine
from sqlparse.tokens import CTE
Expand Down Expand Up @@ -1322,15 +1322,21 @@ def get_table_comment(
return comment

@classmethod
def get_columns(
cls, inspector: Inspector, table_name: str, schema: str | None
def get_columns( # pylint: disable=unused-argument
cls,
inspector: Inspector,
table_name: str,
schema: str | None,
options: dict[str, Any] | None = None,
) -> list[ResultSetColumnType]:
"""
Get all columns from a given schema and table
:param inspector: SqlAlchemy Inspector instance
:param table_name: Table name
:param schema: Schema name. If omitted, uses default schema for database
:param options: Extra options to customise the display of columns in
some databases
:return: All columns in table
"""
return convert_inspector_columns(
Expand Down Expand Up @@ -1382,7 +1388,12 @@ def where_latest_partition( # pylint: disable=too-many-arguments,unused-argumen

@classmethod
def _get_fields(cls, cols: list[ResultSetColumnType]) -> list[Any]:
return [column(c["column_name"]) for c in cols]
return [
literal_column(query_as)
if (query_as := c.get("query_as"))
else column(c["column_name"])
for c in cols
]

@classmethod
def select_star( # pylint: disable=too-many-arguments,too-many-locals
Expand Down
11 changes: 0 additions & 11 deletions superset/db_engine_specs/druid.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,12 @@
from typing import Any, TYPE_CHECKING

from sqlalchemy import types
from sqlalchemy.engine.reflection import Inspector

from superset import is_feature_enabled
from superset.constants import TimeGrain
from superset.db_engine_specs.base import BaseEngineSpec
from superset.db_engine_specs.exceptions import SupersetDBAPIConnectionError
from superset.exceptions import SupersetException
from superset.superset_typing import ResultSetColumnType
from superset.utils import core as utils

if TYPE_CHECKING:
Expand Down Expand Up @@ -130,15 +128,6 @@ def epoch_ms_to_dttm(cls) -> str:
"""
return "MILLIS_TO_TIMESTAMP({col})"

@classmethod
def get_columns(
cls, inspector: Inspector, table_name: str, schema: str | None
) -> list[ResultSetColumnType]:
"""
Update the Druid type map.
"""
return super().get_columns(inspector, table_name, schema)

@classmethod
def get_dbapi_exception_mapping(cls) -> dict[type[Exception], type[Exception]]:
# pylint: disable=import-outside-toplevel
Expand Down
8 changes: 6 additions & 2 deletions superset/db_engine_specs/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,9 +410,13 @@ def handle_cursor( # pylint: disable=too-many-locals

@classmethod
def get_columns(
cls, inspector: Inspector, table_name: str, schema: str | None
cls,
inspector: Inspector,
table_name: str,
schema: str | None,
options: dict[str, Any] | None = None,
) -> list[ResultSetColumnType]:
return BaseEngineSpec.get_columns(inspector, table_name, schema)
return BaseEngineSpec.get_columns(inspector, table_name, schema, options)

@classmethod
def where_latest_partition( # pylint: disable=too-many-arguments
Expand Down
7 changes: 6 additions & 1 deletion superset/db_engine_specs/presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -981,14 +981,19 @@ def _show_columns(

@classmethod
def get_columns(
cls, inspector: Inspector, table_name: str, schema: str | None
cls,
inspector: Inspector,
table_name: str,
schema: str | None,
options: dict[str, Any] | None = None,
) -> list[ResultSetColumnType]:
"""
Get columns from a Presto data source. This includes handling row and
array data types
:param inspector: object that performs database schema inspection
:param table_name: table name
:param schema: schema name
:param options: Extra configuration options, not used by this backend
:return: a list of results that contain column info
(i.e. column name and data type)
"""
Expand Down
62 changes: 62 additions & 0 deletions superset/db_engine_specs/trino.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,18 @@

import simplejson as json
from flask import current_app
from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.engine.url import URL
from sqlalchemy.orm import Session
from trino.sqlalchemy import datatype

from superset.constants import QUERY_CANCEL_KEY, QUERY_EARLY_CANCEL_KEY, USER_AGENT
from superset.databases.utils import make_url_safe
from superset.db_engine_specs.base import BaseEngineSpec
from superset.db_engine_specs.exceptions import SupersetDBAPIConnectionError
from superset.db_engine_specs.presto import PrestoBaseEngineSpec
from superset.models.sql_lab import Query
from superset.superset_typing import ResultSetColumnType
from superset.utils import core as utils

if TYPE_CHECKING:
Expand Down Expand Up @@ -331,3 +334,62 @@ def get_dbapi_exception_mapping(cls) -> dict[type[Exception], type[Exception]]:
return {
requests_exceptions.ConnectionError: SupersetDBAPIConnectionError,
}

@classmethod
def _expand_columns(cls, col: ResultSetColumnType) -> list[ResultSetColumnType]:
"""
Expand the given column out to one or more columns by analysing their types,
descending into ROWS and expanding out their inner fields recursively.
We can only navigate named fields in ROWs in this way, so we can't expand out
MAP or ARRAY types, nor fields in ROWs which have no name (in fact the trino
library doesn't correctly parse unnamed fields in ROWs). We won't be able to
expand ROWs which are nested underneath any of those types, either.
Expanded columns are named foo.bar.baz and we provide a query_as property to
instruct the base engine spec how to correctly query them: instead of quoting
the whole string they have to be quoted like "foo"."bar"."baz" and we then
alias them to the full dotted string for ease of reference.
"""
cols = [col]
col_type = col.get("type")

if not isinstance(col_type, datatype.ROW):
return cols

for inner_name, inner_type in col_type.attr_types:
outer_name = col["name"]
name = ".".join([outer_name, inner_name])
query_name = ".".join([f'"{piece}"' for piece in name.split(".")])
column_spec = cls.get_column_spec(str(inner_type))
is_dttm = column_spec.is_dttm if column_spec else False

inner_col = ResultSetColumnType(
name=name,
column_name=name,
type=inner_type,
is_dttm=is_dttm,
query_as=f'{query_name} AS "{name}"',
)
cols.extend(cls._expand_columns(inner_col))

return cols

@classmethod
def get_columns(
cls,
inspector: Inspector,
table_name: str,
schema: str | None,
options: dict[str, Any] | None = None,
) -> list[ResultSetColumnType]:
"""
If the "expand_rows" feature is enabled on the database via
"schema_options", expand the schema definition out to show all
subfields of nested ROWs as their appropriate dotted paths.
"""
base_cols = super().get_columns(inspector, table_name, schema, options)
if not (options or {}).get("expand_rows"):
return base_cols

return [col for base_col in base_cols for col in cls._expand_columns(base_col)]
10 changes: 9 additions & 1 deletion superset/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,11 @@ def disable_data_preview(self) -> bool:
# this will prevent any 'trash value' strings from going through
return self.get_extra().get("disable_data_preview", False) is True

@property
def schema_options(self) -> dict[str, Any]:
"""Additional schema display config for engines with complex schemas"""
return self.get_extra().get("schema_options", {})

@property
def data(self) -> dict[str, Any]:
return {
Expand All @@ -248,6 +253,7 @@ def data(self) -> dict[str, Any]:
"allows_cost_estimate": self.allows_cost_estimate,
"allows_virtual_table_explore": self.allows_virtual_table_explore,
"explore_database_id": self.explore_database_id,
"schema_options": self.schema_options,
"parameters": self.parameters,
"disable_data_preview": self.disable_data_preview,
"parameters_schema": self.parameters_schema,
Expand Down Expand Up @@ -838,7 +844,9 @@ def get_columns(
self, table_name: str, schema: str | None = None
) -> list[ResultSetColumnType]:
with self.get_inspector_with_context() as inspector:
return self.db_engine_spec.get_columns(inspector, table_name, schema)
return self.db_engine_spec.get_columns(
inspector, table_name, schema, self.schema_options
)

def get_metrics(
self,
Expand Down
2 changes: 2 additions & 0 deletions superset/superset_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ class ResultSetColumnType(TypedDict):
scale: NotRequired[Any]
max_length: NotRequired[Any]

query_as: NotRequired[Any]


CacheConfig = dict[str, Any]
DbapiDescriptionRow = tuple[
Expand Down
Loading

0 comments on commit 8d73ab9

Please sign in to comment.