diff --git a/backend/zeno_backend/classes/metadata.py b/backend/zeno_backend/classes/metadata.py index 055f1608..a3ced981 100644 --- a/backend/zeno_backend/classes/metadata.py +++ b/backend/zeno_backend/classes/metadata.py @@ -10,6 +10,9 @@ class HistogramBucket(CamelModel): bucket: float | bool | int | str bucket_end: float | bool | int | str | None = None + size: int | None = None + filtered_size: int | None = None + metric: float | None = None class HistogramColumnRequest(CamelModel): diff --git a/backend/zeno_backend/database/select.py b/backend/zeno_backend/database/select.py index b7a8a351..d79b0a48 100644 --- a/backend/zeno_backend/database/select.py +++ b/backend/zeno_backend/database/select.py @@ -677,7 +677,7 @@ def table_data_paginated( db.cur.execute( sql.SQL("SELECT * FROM {} WHERE ").format(sql.Identifier(project)) + filter_sql - + sql.SQL("ORDER BY {} {} LIMIT {} OFFSET {};").format( + + sql.SQL(" ORDER BY {} {} LIMIT {} OFFSET {};").format( sql.Identifier(sort_by[0].id if sort_by[0] else "data_id"), sql.SQL("DESC" if sort_by[1] else "ASC"), sql.Literal(limit), diff --git a/backend/zeno_backend/database/util.py b/backend/zeno_backend/database/util.py index b046c173..365058de 100644 --- a/backend/zeno_backend/database/util.py +++ b/backend/zeno_backend/database/util.py @@ -18,6 +18,8 @@ def resolve_metadata_type(data_frame: pd.DataFrame, column: str) -> MetadataType """ dtype = data_frame[column].dtype if pd.api.types.is_any_real_numeric_dtype(dtype): + if data_frame[column].nunique() < 20: + return MetadataType.NOMINAL return MetadataType.CONTINUOUS elif pd.api.types.is_bool_dtype(dtype): return MetadataType.BOOLEAN diff --git a/backend/zeno_backend/processing/filtering.py b/backend/zeno_backend/processing/filtering.py index 55275796..34149733 100644 --- a/backend/zeno_backend/processing/filtering.py +++ b/backend/zeno_backend/processing/filtering.py @@ -34,18 +34,15 @@ def filter_to_sql( + sql.SQL(")") ) else: - try: - val = str(float(f.value)) - except ValueError: - if str(f.value).lower() in [ - "true", - "false", - ]: - val = "True" if str(f.value).lower() == "true" else "False" - else: - val = str(f.value) - if f.operation == Operation.LIKE or f.operation == Operation.ILIKE: - val = "%" + val + "%" + if str(f.value).lower() in [ + "true", + "false", + ]: + val = "True" if str(f.value).lower() == "true" else "False" + else: + val = str(f.value) + if f.operation == Operation.LIKE or f.operation == Operation.ILIKE: + val = "%" + val + "%" column_id = ( f.column.id if f.column.model is None or model is None diff --git a/backend/zeno_backend/processing/histogram_processing.py b/backend/zeno_backend/processing/histogram_processing.py index f4e30bcc..de57fb4b 100644 --- a/backend/zeno_backend/processing/histogram_processing.py +++ b/backend/zeno_backend/processing/histogram_processing.py @@ -1,4 +1,7 @@ """Functions for creating the frontend metadata histograms.""" + +import asyncio + import numpy as np import pandas as pd from psycopg import sql @@ -6,14 +9,62 @@ from zeno_backend.classes.base import MetadataType, ZenoColumn from zeno_backend.classes.metadata import ( HistogramBucket, + HistogramColumnRequest, HistogramRequest, ) from zeno_backend.database.select import column, project_from_uuid -from zeno_backend.processing.filtering import bucket_filter, filter_to_sql, table_filter +from zeno_backend.processing.filtering import bucket_filter, table_filter from zeno_backend.processing.metrics.map import metric_map -def histogram_buckets( +async def histogram_bucket(project_uuid: str, col: ZenoColumn, num_bins: int | str): + """Calculate the histogram buckets for a single column. + + Args: + project_uuid (str): the project the user is currently working with. + col (ZenoColumn): the column to compute buckets for. + num_bins (int): the number of bins to use for the histogram. + + + Returns: + List[HistogramBucket]: the buckets for the column histogram. + """ + col_list = column(project_uuid, col) + df_col: pd.Series = pd.DataFrame({"col": col_list})["col"] + if col.data_type == MetadataType.NOMINAL: + ret_hist: list[HistogramBucket] = [] + val_counts: pd.Series = df_col.value_counts() + if len(val_counts) > 30: + return [] + else: + for k in val_counts.keys(): # type: ignore + ret_hist.append(HistogramBucket(bucket=k)) + return ret_hist + elif col.data_type == MetadataType.CONTINUOUS: + ret_hist: list[HistogramBucket] = [] + df_col = pd.to_numeric(df_col).fillna(0) # type: ignore + bins = np.histogram_bin_edges(df_col, bins=num_bins) + for i in range(len(bins) - 1): + ret_hist.append( + HistogramBucket( + bucket=bins[i], + bucket_end=bins[i + 1], + ) + ) + return ret_hist + elif col.data_type == MetadataType.BOOLEAN: + return [ + HistogramBucket(bucket=True), + HistogramBucket(bucket=False), + ] + + elif col.data_type == MetadataType.DATETIME: + return [] + else: + return [] + + +async def histogram_buckets( project: str, req: list[ZenoColumn], num_bins: int | str = "doane" ) -> list[list[HistogramBucket]]: """Calculate the histogram buckets for a list of columns. @@ -30,133 +81,153 @@ def histogram_buckets( Returns: List[List[HistogramBucket]]: for each zeno column return a list of buckets """ - res: list[list[HistogramBucket]] = [] - for col in req: - col_list = column(project, col) - df_col: pd.Series = pd.DataFrame({"col": col_list})["col"] - if col.data_type == MetadataType.NOMINAL: - ret_hist: list[HistogramBucket] = [] - val_counts: pd.Series = df_col.value_counts() - for k in val_counts.keys(): # type: ignore - ret_hist.append(HistogramBucket(bucket=k)) - res.append(ret_hist) - elif col.data_type == MetadataType.CONTINUOUS: - ret_hist: list[HistogramBucket] = [] - df_col = pd.to_numeric(df_col).fillna(0) # type: ignore - bins = np.histogram_bin_edges(df_col, bins=num_bins) - for i in range(len(bins) - 1): - ret_hist.append( - HistogramBucket( - bucket=bins[i], - bucket_end=bins[i + 1], - ) - ) - res.append(ret_hist) - elif col.data_type == MetadataType.BOOLEAN: - res.append( - [ - HistogramBucket(bucket=True), - HistogramBucket(bucket=False), - ] - ) - elif col.data_type == MetadataType.DATETIME: - res.append([]) - else: - res.append([]) + tasks = [histogram_bucket(project, col, num_bins) for col in req] + res = await asyncio.gather(*tasks) return res -def histogram_counts(project: str, req: HistogramRequest) -> list[list[int]]: - """Calculate count for each bucket in each column histogram. +async def histogram_metric_task( + request: HistogramRequest, + col_request: HistogramColumnRequest, + bucket: HistogramBucket, + project_uuid: str, + filter_sql: sql.Composed | None, +) -> HistogramBucket: + """Calculate the metric for a single bucket. Args: - project (str): the project the user is currently working with. - req (HistogramRequest): specifying which histograms to calculate counts for. + request (HistogramRequest): the request object. + col_request (HistogramColumnRequest): the column request object. + bucket (HistogramBucket): the bucket to calculate the metric for. + project_uuid (str): the project the user is currently working with. + filter_sql (sql.Composed): the filter to apply to the query. + Returns: - List[List[int]]: counts for the individual buckets of specified histograms. + HistogramBucket: the bucket with the metric added. """ - ret: list[list[int]] = [] - for r in req.column_requests: - col = r.column - data_frame = pd.DataFrame( - { - "col": column( - project, - col, - None - if req.filter_predicates is None - else filter_to_sql(req.filter_predicates, project), - ) - } - ) + filter_bucket = bucket_filter(col_request.column, bucket) + final_filter = filter_sql + if filter_bucket is not None: + if final_filter is None: + final_filter = filter_bucket + else: + final_filter = final_filter + sql.SQL(" AND ") + filter_bucket + metric = metric_map(request.metric, project_uuid, request.model, final_filter) + return HistogramBucket( + bucket=bucket.bucket, + bucket_end=bucket.bucket_end, + size=metric.size, + metric=metric.metric, + ) + + +async def histogram_count( + request: HistogramRequest, + col_request: HistogramColumnRequest, + project_uuid: str, + filter_sql: sql.Composed | None, + calculate_histograms: bool, +) -> list[HistogramBucket]: + """Calculate the counts and metrics for a column. + + Args: + request (HistogramRequest): the request object. + col_request (HistogramColumnRequest): the column request object. + project_uuid (str): the project the user is currently working with. + filter_sql (sql.Composed): the filter to apply to the query. + calculate_histograms (bool): whether to calculate the histograms or not. + + + Returns: + List[HistogramBucket]: the buckets with the counts and metrics added. + """ + col = col_request.column + + if request.model is None or request.metric is None or not calculate_histograms: + col = col_request.column + data_frame = pd.DataFrame({"col": column(project_uuid, col, filter_sql)}) if col.data_type == MetadataType.NOMINAL: - counts: pd.Series[int] = data_frame.groupby("col").size() # type: ignore - ret.append( - [ - counts[b.bucket] if b.bucket in counts else 0 # type: ignore - for b in r.buckets + if data_frame["col"].nunique() > 30: + return [] + else: + counts: pd.Series[int] = data_frame.groupby("col").size() + return [ + HistogramBucket( + bucket=b.bucket, + size=counts[b.bucket] # type: ignore + if b.bucket in counts + else 0, + ) + for b in col_request.buckets ] - ) - elif col.data_type == MetadataType.BOOLEAN: - ret.append( - [data_frame["col"].sum(), len(data_frame) - data_frame["col"].sum()] - ) + elif col.data_type == MetadataType.CONTINUOUS: - bucs = [b.bucket for b in r.buckets] - ret.append( + bucs = [b.bucket for b in col_request.buckets] + intervals = pd.IntervalIndex.from_arrays( + [float(b.bucket) for b in col_request.buckets], + [float(b.bucket_end) for b in col_request.buckets], # type: ignore + ) + counts = ( data_frame.groupby( - [pd.cut(data_frame["col"], bucs)], observed=False # type: ignore + [pd.cut(data_frame["col"], intervals)], + observed=False, # type: ignore ) .size() .astype(int) .tolist() ) + return [ + HistogramBucket( + bucket=b, + size=counts[i], + ) + for i, b in enumerate(bucs) + ] + elif col.data_type == MetadataType.BOOLEAN: + return [ + HistogramBucket( + bucket=True, + size=data_frame["col"].sum(), + ), + HistogramBucket( + bucket=False, + size=len(data_frame) - data_frame["col"].sum(), + ), + ] else: - ret.append([]) - return ret + return [] + else: + tasks = [ + histogram_metric_task(request, col_request, b, project_uuid, filter_sql) + for b in col_request.buckets + ] + return await asyncio.gather(*tasks) -def histogram_metrics(project: str, req: HistogramRequest) -> list[list[float | None]]: - """Calculate metric for each bucket in each column histogram. +async def histogram_counts( + project_uuid: str, req: HistogramRequest +) -> list[list[HistogramBucket]]: + """Calculate count and optionally metric for each bucket in each column histogram. Args: - project (str): the project the user is currently working with. - req (HistogramRequest): the histograms for which to calculate metrics. + project_uuid (str): the project the user is currently working with. + req (HistogramRequest): specifying which histograms to calculate counts for. Returns: - List[List[Union[float, None]]]: metrics for the requested histogram buckets. + List[List[int]]: counts for the individual buckets of specified histograms. """ - project_obj = project_from_uuid(project) - if ( - req.metric is None - or project_obj is not None - and project_obj.calculate_histogram_metrics is False - ): + project_obj = project_from_uuid(project_uuid) + if project_obj is None: return [] - - filter_sql = table_filter(project, req.model, req.filter_predicates, req.data_ids) - ret: list[list[float | None]] = [] - for r in req.column_requests: - loc_ret: list[float | None] = [] - index = 0 - for bucket in r.buckets: - if req.model: - filter_bucket = bucket_filter(r.column, bucket) - if index == 0: - index = 1 - final_filter = filter_sql - if filter_bucket is not None: - if final_filter is None: - final_filter = filter_bucket - else: - final_filter = final_filter + sql.SQL(" AND ") + filter_bucket - metric = metric_map(req.metric, project, req.model, final_filter) - if metric.metric is None: - loc_ret.append(None) - else: - loc_ret.append(metric.metric) - else: - loc_ret.append(None) - ret.append(loc_ret) - return ret + filter_sql = table_filter( + project_uuid, req.model, req.filter_predicates, req.data_ids + ) + tasks = [ + histogram_count( + req, r, project_uuid, filter_sql, project_obj.calculate_histogram_metrics + ) + for r in req.column_requests + ] + res = await asyncio.gather(*tasks) + return res diff --git a/backend/zeno_backend/processing/metrics/mean.py b/backend/zeno_backend/processing/metrics/mean.py index ade203d1..4ff8c694 100644 --- a/backend/zeno_backend/processing/metrics/mean.py +++ b/backend/zeno_backend/processing/metrics/mean.py @@ -1,7 +1,7 @@ """Mean metric calculation.""" from psycopg import sql -from zeno_backend.classes.base import GroupMetric +from zeno_backend.classes.base import GroupMetric, MetadataType from zeno_backend.classes.metric import Metric from zeno_backend.database.database import Database @@ -25,9 +25,9 @@ def mean( """ with Database() as db: # Get column name from project column map - column_id = db.execute_return( + column_output = db.execute_return( sql.SQL( - "SELECT column_id FROM {} WHERE name = {} AND" + "SELECT column_id, data_type FROM {} WHERE name = {} AND" " (model IS NULL OR model = {})" ).format( sql.Identifier(f"{project_uuid}_column_map"), @@ -36,20 +36,24 @@ def mean( ) ) - if len(column_id) == 0: + if len(column_output) == 0: return GroupMetric(metric=None, size=0) - column_id = column_id[0][0] + column_id = column_output[0][0] + if column_output[0][1] == MetadataType.BOOLEAN: + column_id = sql.Identifier(column_id) + sql.SQL("::int") + else: + column_id = sql.Identifier(column_id) if filter is None: db.execute( - sql.SQL("SELECT COUNT(*) AS n, AVG({}::float) FROM {}").format( - sql.Identifier(column_id), sql.Identifier(project_uuid) + sql.SQL("SELECT COUNT(*) AS n, AVG({}) FROM {}").format( + column_id, sql.Identifier(project_uuid) ) ) else: db.execute( - sql.SQL("SELECT COUNT(*) AS n, AVG({}::float) FROM {} WHERE ").format( - sql.Identifier(column_id), sql.Identifier(project_uuid) + sql.SQL("SELECT COUNT(*) AS n, AVG({}) FROM {} WHERE ").format( + column_id, sql.Identifier(project_uuid) ) + filter ) diff --git a/backend/zeno_backend/server.py b/backend/zeno_backend/server.py index a3770ce4..32979439 100644 --- a/backend/zeno_backend/server.py +++ b/backend/zeno_backend/server.py @@ -36,7 +36,6 @@ HistogramRequest, histogram_buckets, histogram_counts, - histogram_metrics, ) from zeno_backend.processing.metrics.map import metric_map from zeno_backend.processing.slice_finder import slice_finder @@ -379,34 +378,24 @@ def get_metrics_for_slices(req: MetricRequest, project: str, request: Request): response_model=list[list[HistogramBucket]], tags=["zeno"], ) - def get_histogram_buckets(req: list[ZenoColumn], project: str, request: Request): - if not util.access_valid(project, request): - return Response(status_code=401) - return histogram_buckets(project, req) - - @api_app.post( - "/histogram-counts/{project}", - response_model=list[list[int]], - tags=["zeno"], - ) - def calculate_histogram_counts( - req: HistogramRequest, project: str, request: Request + async def get_histogram_buckets( + req: list[ZenoColumn], project: str, request: Request ): if not util.access_valid(project, request): return Response(status_code=401) - return histogram_counts(project, req) + return await histogram_buckets(project, req) @api_app.post( - "/histogram-metrics/{project}", - response_model=list[list[float | None]], + "/histogram-counts/{project}", + response_model=list[list[HistogramBucket]], tags=["zeno"], ) - def calculate_histogram_metrics( + async def calculate_histograms( req: HistogramRequest, project: str, request: Request ): if not util.access_valid(project, request): return Response(status_code=401) - return histogram_metrics(project, req) + return await histogram_counts(project, req) @api_app.get( "/project-users/{project}", diff --git a/frontend/src/lib/api/metadata.ts b/frontend/src/lib/api/metadata.ts index 37c0f448..9d368ec2 100644 --- a/frontend/src/lib/api/metadata.ts +++ b/frontend/src/lib/api/metadata.ts @@ -4,46 +4,19 @@ * can run then asynchronously and provide interactive updates while waiting * for more expensive computations like calculating metrics. */ -import { columns, metricRange, project, requestingHistogramCounts } from '$lib/stores'; +import { metricRange, requestingHistogramCounts } from '$lib/stores'; import { getMetricRange } from '$lib/util/util'; import { CancelablePromise, ZenoColumnType, ZenoService, type FilterPredicateGroup, + type HistogramBucket, type Metric, type ZenoColumn } from '$lib/zenoapi'; import { get } from 'svelte/store'; -export interface HistogramEntry { - bucket: number | string | boolean; - bucketEnd?: number | string | boolean | null; - count?: number; - filteredCount?: number; - metric?: number; -} - -export async function loadHistogramData( - project_uuid: string, - tagIds: string[] | undefined, - selectionIds: string[] | undefined, - model: string | undefined, - columns: ZenoColumn[] -) { - const dataIds = - tagIds !== undefined && selectionIds !== undefined - ? [...new Set([...tagIds, ...selectionIds])] - : tagIds !== undefined - ? tagIds - : selectionIds; - const histograms = await getHistograms(project_uuid, columns, model); - const counts = await getHistogramCounts(project_uuid, columns, histograms, undefined, dataIds); - if (counts === undefined) { - return; - } - return counts; -} /** * Fetch metadata columns buckets for histograms. * @@ -53,25 +26,27 @@ export async function loadHistogramData( * @returns Histogram buckets for each column. */ export async function getHistograms( - project_uuid: string, + project_uuid: string | undefined, completeColumns: ZenoColumn[], model: string | undefined -): Promise> { +): Promise> { + if (!project_uuid) { + return new Map(); + } const requestedHistograms = completeColumns.filter( (c) => (c.model === null || c.model === model) && c.columnType !== ZenoColumnType.DATA ); + requestingHistogramCounts.set(true); const res = await ZenoService.getHistogramBuckets(project_uuid, requestedHistograms); requestingHistogramCounts.set(false); - const histograms = new Map( - requestedHistograms.map((col, i) => [col.id, res[i]]) - ); - return histograms; + + return new Map(requestedHistograms.map((col, i) => [col.id, res[i]])); } // Since a user might change the selection before we get counts, // make this fetch request cancellable. -let histogramCountRequest: CancelablePromise>>; +let histogramRequest: CancelablePromise>>; /** * Fetch histogram counts for the buckets of metadata columns. * @@ -79,30 +54,41 @@ let histogramCountRequest: CancelablePromise>>; * @param filterPredicates Filter predicates to filter DataFrame by. * @returns Histogram counts for each column. */ -export async function getHistogramCounts( - project_uuid: string, +export async function calculateHistograms( + project_uuid: string | undefined, columns: ZenoColumn[], - histograms: Map, + histograms: Map, filterPredicates?: FilterPredicateGroup, - dataIds?: string[] -): Promise | undefined> { + dataIds?: string[], + model?: string | null, + metric?: Metric | null +): Promise> { + if (!project_uuid) { + return new Map(); + } const columnRequests = [...histograms.entries()].map(([k, v]) => ({ column: columns.find((col) => col.id === k) ?? columns[0], buckets: v })); - if (histogramCountRequest) { - histogramCountRequest.cancel(); + if (histogramRequest) { + histogramRequest.cancel(); } try { requestingHistogramCounts.set(true); - histogramCountRequest = ZenoService.calculateHistogramCounts(project_uuid, { + histogramRequest = ZenoService.calculateHistograms(project_uuid, { columnRequests, filterPredicates, + model, + metric, dataIds }); - const out = await histogramCountRequest; + const out = await histogramRequest; requestingHistogramCounts.set(false); + if (get(metricRange)[0] === Infinity) { + metricRange.set(getMetricRange(out)); + } + [...histograms.keys()].forEach((k, i) => { const hist = histograms.get(k); if (hist) { @@ -110,9 +96,10 @@ export async function getHistogramCounts( k, hist.map((h, j) => { if (filterPredicates === undefined) { - h.count = out[i][j]; + h.size = out[i][j].size; } - h.filteredCount = out[i][j]; + h.metric = out[i][j].metric; + h.filteredSize = out[i][j].size; return h; }) ); @@ -120,79 +107,7 @@ export async function getHistogramCounts( }); return histograms; } catch (e) { - return undefined; - } -} - -// Since a user might change the selection before we get metrics, -// make this fetch request cancellable. -let histogramMetricRequest: CancelablePromise>>; -/** - * Fetch histogram metrics for the buckets of metadata columns. - * - * @param histograms Histogram buckets for each column. - * @param filterPredicates Filter predicates to filter DataFrame by. - * @param model Model to fetch metrics for. - * @param metric Metric to calculate per bucket. - * @returns Histogram metrics for each column. - */ -export async function getHistogramMetrics( - histograms: Map, - model: string | undefined, - metric: Metric, - dataIds: string[] | undefined, - filterPredicates?: FilterPredicateGroup -): Promise | undefined> { - const config = get(project); - if (!config || !config.calculateHistogramMetrics) { - return undefined; - } - const columnRequests = [...histograms.entries()].map(([k, v]) => ({ - column: get(columns).find((col) => col.id === k) ?? get(columns)[0], - buckets: v - })); - if (histogramMetricRequest) { - histogramMetricRequest.cancel(); - } - try { - const config = get(project); - if (!config) { - return Promise.reject('No project selected.'); - } - histogramMetricRequest = ZenoService.calculateHistogramMetrics(config.uuid, { - columnRequests, - filterPredicates, - model: model ?? null, - metric, - dataIds - }); - - requestingHistogramCounts.set(true); - const res = await histogramMetricRequest; requestingHistogramCounts.set(false); - - if (res === undefined) { - return undefined; - } - - if (get(metricRange)[0] === Infinity) { - metricRange.set(getMetricRange(res)); - } - - [...histograms.keys()].forEach((k, i) => { - const hist = histograms.get(k); - if (hist !== undefined) { - histograms.set( - k, - hist.map((h, j) => { - h.metric = res[i][j] || 0; - return h; - }) - ); - } - }); return histograms; - } catch (e) { - return undefined; } } diff --git a/frontend/src/lib/components/chart/ChartHomeBlock.svelte b/frontend/src/lib/components/chart/ChartHomeBlock.svelte index e892da54..563e0648 100644 --- a/frontend/src/lib/components/chart/ChartHomeBlock.svelte +++ b/frontend/src/lib/components/chart/ChartHomeBlock.svelte @@ -70,7 +70,7 @@ type: chart.type, parameters: chart.parameters }).then((res) => { - invalidate('app:state'); + invalidate('app:chart'); charts.update((c) => { c.push({ id: res, @@ -93,8 +93,8 @@ e.stopPropagation(); showOptions = false; ZenoService.deleteChart(chart).then(() => { - invalidate('app:state'); - charts.update((c) => c.filter((c) => c.id != chart.id)); + charts.update((c) => c.filter((c) => c.id !== chart.id)); + invalidate('app:chart'); }); }} > diff --git a/frontend/src/lib/components/general/Header.svelte b/frontend/src/lib/components/general/Header.svelte index e80bf90c..055ac055 100644 --- a/frontend/src/lib/components/general/Header.svelte +++ b/frontend/src/lib/components/general/Header.svelte @@ -70,7 +70,7 @@ {/if}
- {#if currentTab?.includes('project') && $project && $project.ownerName === user?.name} + {#if (currentTab?.includes('explore') || currentTab?.includes('compare') || currentTab?.includes('chart')) && $project?.ownerName === user?.name} import { page } from '$app/stores'; - import { - getHistogramCounts, - getHistogramMetrics, - loadHistogramData, - type HistogramEntry - } from '$lib/api/metadata'; + import { calculateHistograms, getHistograms } from '$lib/api/metadata'; import MetadataCell from '$lib/components/metadata/cells/MetadataCell.svelte'; - import { - columns, - metric, - model, - project, - selectionIds, - selectionPredicates, - tagIds - } from '$lib/stores'; - import { Join, ZenoColumnType, type FilterPredicateGroup, type Metric } from '$lib/zenoapi'; + import { columns, metric, model, project, selectionIds, selectionPredicates } from '$lib/stores'; + import { ZenoColumnType, type HistogramBucket } from '$lib/zenoapi'; - let metadataHistograms: Map = new Map(); + let metadataHistograms: Map = new Map(); - if ($project) { - loadHistogramData($project?.uuid, undefined, undefined, $model, $columns).then((res) => { - if (res !== undefined) { - metadataHistograms = res; - } - }); - } - - function loadCountsAndMetrics( - tagIds: string[] | undefined, - selectionIds: string[] | undefined, - model: string | undefined, - metric: Metric | undefined, - selectionPredicates: FilterPredicateGroup | undefined - ) { - if ($project === undefined) return; - const dataIds = - tagIds !== undefined && selectionIds !== undefined - ? [...new Set([...tagIds, ...selectionIds])] - : tagIds !== undefined - ? tagIds - : selectionIds; - getHistogramCounts( - $project.uuid, + getHistograms($project?.uuid, $columns, $model).then((res) => { + calculateHistograms( + $project?.uuid, $columns, - metadataHistograms, - selectionPredicates === undefined - ? undefined - : { - predicates: [selectionPredicates], - join: Join.AND - }, - dataIds + res, + undefined, + $selectionIds, + $model, + $metric ).then((res) => { - if (res === undefined || model === undefined || metric === undefined) { - return; - } - metadataHistograms = res; - getHistogramMetrics( - res, - model, - metric, - dataIds, - selectionPredicates === undefined - ? undefined - : { - predicates: [selectionPredicates], - join: Join._ - } - ).then((res) => { - if (res === undefined) { - return; - } - metadataHistograms = res; - }); - }); - } - - // Calculate histogram metrics when metric changes - metric.subscribe((metric) => { - if (metadataHistograms.size === 0 || $model === undefined || metric === undefined) { - return; - } - - const dataIds = - $tagIds !== undefined && $selectionIds !== undefined - ? [...new Set([...$tagIds, ...$selectionIds])] - : $tagIds !== undefined - ? $tagIds - : $selectionIds; - - getHistogramMetrics(metadataHistograms, $model, metric, dataIds, undefined).then((res) => { - if (res === undefined) { - return; - } metadataHistograms = res; }); }); - // Calculate histogram counts when model changes for feature columns - model.subscribe((model) => { - if ($project) { - loadHistogramData($project.uuid, undefined, undefined, model, $columns).then((res) => { - if (res !== undefined) { - metadataHistograms = res; - } - }); - } + metric.subscribe((m) => { + calculateHistograms( + $project?.uuid, + $columns, + metadataHistograms, + $selectionPredicates, + $selectionIds, + $model, + m + ).then((r) => (metadataHistograms = r)); }); - // when the selection Ids change, update the histograms - selectionIds.subscribe((selectionIds) => { - if (metadataHistograms.size === 0) { - return; - } - loadCountsAndMetrics($tagIds, selectionIds, $model, $metric, $selectionPredicates); + model.subscribe((m) => { + getHistograms($project?.uuid, $columns, $model).then((res) => { + calculateHistograms( + $project?.uuid, + $columns, + res, + $selectionPredicates, + $selectionIds, + m, + $metric + ).then((r) => (metadataHistograms = r)); + }); }); - // when the tag Ids change, update the histograms - tagIds.subscribe((tIds) => { - if (metadataHistograms.size === 0) { - return; - } - loadCountsAndMetrics(tIds, $selectionIds, $model, $metric, $selectionPredicates); - }); + // when the selection Ids change, update the histograms + selectionIds.subscribe((selectionIds) => + calculateHistograms( + $project?.uuid, + $columns, + metadataHistograms, + $selectionPredicates, + selectionIds, + $model, + $metric + ).then((r) => (metadataHistograms = r)) + ); // Update counts and metrics when selection changes. selectionPredicates.subscribe((sels) => { if (metadataHistograms.size === 0) { return; } - loadCountsAndMetrics($tagIds, $selectionIds, $model, $metric, sels); + calculateHistograms( + $project?.uuid, + $columns, + metadataHistograms, + sels, + $selectionIds, + $model, + $metric + ).then((r) => (metadataHistograms = r)); }); diff --git a/frontend/src/lib/components/metadata/HistogramsHeader.svelte b/frontend/src/lib/components/metadata/HistogramsHeader.svelte index b286944f..9d47b67e 100644 --- a/frontend/src/lib/components/metadata/HistogramsHeader.svelte +++ b/frontend/src/lib/components/metadata/HistogramsHeader.svelte @@ -8,7 +8,7 @@

Metadata

diff --git a/frontend/src/lib/components/metadata/cells/MetadataCell.svelte b/frontend/src/lib/components/metadata/cells/MetadataCell.svelte index b6490d23..4704037f 100644 --- a/frontend/src/lib/components/metadata/cells/MetadataCell.svelte +++ b/frontend/src/lib/components/metadata/cells/MetadataCell.svelte @@ -1,11 +1,11 @@ {#if histogram} -
+
diff --git a/frontend/src/lib/components/metadata/cells/metadata-cells/BinaryMetadataCell.svelte b/frontend/src/lib/components/metadata/cells/metadata-cells/BinaryMetadataCell.svelte index c27c418e..15f2f48f 100644 --- a/frontend/src/lib/components/metadata/cells/metadata-cells/BinaryMetadataCell.svelte +++ b/frontend/src/lib/components/metadata/cells/metadata-cells/BinaryMetadataCell.svelte @@ -1,12 +1,17 @@