Skip to content

Commit

Permalink
improve batch loading of UUIDs stats
Browse files Browse the repository at this point in the history
https://dash.plotly.com/background-callbacks#using-set_props-within-a-callback

Implements a 'background' callback to improve the batching of add_user_stats. This avoids relying on a dcc.Interval with a fixed time between batches

The new callback, which has background=True, continues running continuously on the server until all of the chunks have been processed. At the end of each chunk it calls set_props, which sends an update to the UI with the latest data for 'store-loaded-uuids'

Via `running=`, the loading spinner is disabled during execution of the callback and reinstated when the callback finishes or cancels.
When there are 0 loaded-uuids-stats and >0 total uuids (i.e. while we are loading the first batch of 10) a secondary loading spinner is shown in place of the table

This should speed things up further and prevent any glitches caused by the fixed interval

The Output of the callback is 'all-uuids-stats-loaded', a new store which is not used anywhere. This seems pointless, but is necessary because the callback's initialization depends on its Output. if it has no Output, it initializes on the Overview page causing errors. If the output is 'loaded-uuids-stats', it waits for the callback to finish before showing anything, defeating the purpose of batch loading. Thus, I resorted to making a new store for it
  • Loading branch information
JGreenlee committed Dec 19, 2024
1 parent d372913 commit 29a4a08
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 84 deletions.
9 changes: 8 additions & 1 deletion app_sidebar_collapsible.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import dash
import dash_bootstrap_components as dbc
from dash import Input, Output, dcc, html, Dash
from dash import Input, Output, dcc, html, Dash, DiskcacheManager
import dash_auth
import logging
import base64
Expand Down Expand Up @@ -42,10 +42,17 @@
'hello': 'world'
}

# diskcache manager; required for 'background' callbacks
# https: // dash.plotly.com/background-callbacks
import diskcache
cache = diskcache.Cache("./cache")
background_callback_manager = DiskcacheManager(cache)

app = Dash(
external_stylesheets=[dbc.themes.BOOTSTRAP, dbc.icons.FONT_AWESOME],
suppress_callback_exceptions=True,
use_pages=True,
background_callback_manager=background_callback_manager
)
server = app.server # expose server variable for Procfile

Expand Down
140 changes: 57 additions & 83 deletions pages/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Since the dcc.Location component is not in the layout when navigating to this page, it triggers the callback.
The workaround is to check if the input value is None.
"""
from dash import dcc, html, Input, Output, callback, register_page, dash_table, State
from dash import dcc, html, Input, Output, callback, register_page, dash_table, State, set_props
# Etc
import logging
import pandas as pd
Expand Down Expand Up @@ -31,9 +31,9 @@
]),
html.Div(id='tabs-content'),
dcc.Store(id='selected-tab', data='tab-uuids-datatable'), # Store to hold selected tab
dcc.Interval(id='interval-load-more', interval=24000, n_intervals=0), # Interval for loading more data
dcc.Store(id='store-uuids', data=[]), # Store to hold the original UUIDs data
dcc.Store(id='store-loaded-uuids', data={'data': [], 'loaded': False}), # Store to track loaded data
dcc.Store(id='loaded-uuids-stats', data=[]),
dcc.Store(id='all-uuids-stats-loaded', data=False),
dcc.Store(id='uuids-page-current', data=0), # Store to track current page for UUIDs DataTable
# RadioItems for key list switch, wrapped in a div that can hide/show
html.Div(
Expand Down Expand Up @@ -143,10 +143,37 @@ def update_uuids_page_current(page_current, selected_tab):
raise PreventUpdate


@callback(
Output('all-uuids-stats-loaded', 'data'),
Input('tabs-datatable', 'value'),
Input('store-uuids', 'data'),
background=True,
# hide the global spinner while callback is running
running=[Output('global-loading', 'display'), 'hide', 'auto'],
# if page changes or tab changes while callback is running, cancel
cancel=[
Input('url', 'pathname'),
Input('tabs-datatable', 'value')
],
)
def load_uuids_stats(tab, uuids):
logging.debug("loading uuids stats for tab %s" % tab)
if tab != 'tab-uuids-datatable':
return

# slice uuids into chunks of 10
uuids_chunks = [uuids['data'][i:i+10] for i in range(0, len(uuids['data']), 10)]
loaded_stats = []
for uuids in uuids_chunks:
processed_uuids = db_utils.add_user_stats(uuids, 10)
loaded_stats.extend(processed_uuids)
logging.debug("loaded %s uuids stats: %s" % (len(loaded_stats), loaded_stats))
set_props('loaded-uuids-stats', {'data': loaded_stats})
return True


@callback(
Output('tabs-content', 'children'),
Output('store-loaded-uuids', 'data'),
Output('interval-load-more', 'disabled'), # Disable interval when all data is loaded
Input('tabs-datatable', 'value'),
Input('store-uuids', 'data'),
Input('store-excluded-uuids', 'data'),
Expand All @@ -156,13 +183,11 @@ def update_uuids_page_current(page_current, selected_tab):
Input('date-picker', 'start_date'),
Input('date-picker', 'end_date'),
Input('date-picker-timezone', 'value'),
Input('interval-load-more', 'n_intervals'), # Interval to trigger the loading of more data
Input('keylist-switch', 'value'), # Add keylist-switch to trigger data refresh on change
Input('uuids-page-current', 'data'), # Current page number for UUIDs DataTable
State('store-loaded-uuids', 'data'), # Use State to track already loaded data
State('store-loaded-uuids', 'loaded') # Keep track if we have finished loading all data
Input('loaded-uuids-stats', 'data'),
)
def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_demographics, store_trajectories, start_date, end_date, timezone, n_intervals, key_list, current_page, loaded_uuids_store, all_data_loaded):
def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_demographics, store_trajectories, start_date, end_date, timezone, key_list, current_page, loaded_uuids):
with ect.Timer() as total_timer:
initial_batch_size = 10 # Define the batch size for loading UUIDs

Expand All @@ -172,74 +197,35 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de

# Initialize return variables
content = None
updated_loaded_uuids_store = loaded_uuids_store.copy() if loaded_uuids_store else {'data': [], 'loaded': False}
interval_disabled = True

# Handle the UUIDs tab without fullscreen loading spinner
if tab == 'tab-uuids-datatable':
with ect.Timer() as handle_uuids_timer:
logging.debug(f"Callback - {selected_tab} Stage 2: Handling UUIDs tab.")
# Prepare the data to be displayed
columns = perm_utils.get_uuids_columns() # Get the relevant columns
df = pd.DataFrame(loaded_uuids)

# Ensure store_uuids contains the key 'data' which is a list of dictionaries
if not isinstance(store_uuids, dict) or 'data' not in store_uuids:
logging.error(f"Expected store_uuids to be a dict with a 'data' key, but got {type(store_uuids)}")
content = html.Div([html.P("Data structure error.")])
interval_disabled = True
if not perm_utils.has_permission('data_uuids'):
logging.debug(f"Callback - {selected_tab} insufficient permission.")
content = html.Div([html.P("No data available or you don't have permission.")])
else:
uuids_list = store_uuids['data']

# Ensure uuids_list is a list for slicing
if not isinstance(uuids_list, list):
logging.error(f"Expected store_uuids['data'] to be a list but got {type(uuids_list)}")
content = html.Div([html.P("Data structure error.")])
interval_disabled = True
if df.empty and len(store_uuids['data']) > 0:
logging.debug(f"Callback - {selected_tab} loaded_uuids is empty.")
content = html.Div(
[dcc.Loading(id='uuids-loading', display='show')],
style={'margin-top': '36px'}
)
else:
loaded_data = updated_loaded_uuids_store.get('data', [])
total_loaded = len(loaded_data)

# Handle lazy loading
if not updated_loaded_uuids_store.get('loaded', False):
total_to_load = total_loaded + initial_batch_size
total_to_load = min(total_to_load, len(uuids_list)) # Avoid loading more than available

logging.debug(f"Callback - {selected_tab} Stage 3: Loading next batch of UUIDs from {total_loaded} to {total_to_load}.")

new_data = uuids_list[total_loaded:total_to_load]

if new_data:
# Process and append the new data to the loaded store
processed_data = db_utils.add_user_stats(new_data, initial_batch_size)
loaded_data.extend(processed_data)

# Update the store with the new data by creating a new dict
updated_loaded_uuids_store = {
'data': loaded_data,
'loaded': len(loaded_data) >= len(uuids_list)
}

logging.debug(f"Callback - {selected_tab} Stage 4: New batch loaded. Total loaded: {len(loaded_data)}.")

# Prepare the data to be displayed
columns = perm_utils.get_uuids_columns() # Get the relevant columns
df = pd.DataFrame(updated_loaded_uuids_store['data'])

if df.empty or not perm_utils.has_permission('data_uuids'):
logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.")
content = html.Div([html.P("No data available or you don't have permission.")])
interval_disabled = True
else:
df = df.drop(columns=[col for col in df.columns if col not in columns])

logging.debug(f"Callback - {selected_tab} Stage 5: Returning appended data to update the UI.")
content = html.Div([
populate_datatable(df, table_id='uuid-table', page_current=current_page), # Pass current_page
html.P(
f"Showing {len(updated_loaded_uuids_store['data'])} of {len(uuids_list)} UUIDs." +
(f" Loading {initial_batch_size} more..." if not updated_loaded_uuids_store.get('loaded', False) else ""),
style={'margin': '15px 5px'}
)
])
interval_disabled = updated_loaded_uuids_store.get('loaded', False)
df = df.drop(columns=[col for col in df.columns if col not in columns])
logging.debug(f"Callback - {selected_tab} Stage 5: Returning appended data to update the UI.")
content = html.Div([
populate_datatable(df, table_id='uuid-table', page_current=current_page), # Pass current_page
html.P(
f"Showing {len(loaded_uuids)} of {len(store_uuids['data'])} UUIDs." +
(f" Loading {initial_batch_size} more..." if len(loaded_uuids) < len(store_uuids['data']) else ""),
style={'margin': '15px 5px'}
)
])

# Store timing after handling UUIDs tab
esdsq.store_dashboard_time(
Expand All @@ -262,7 +248,6 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de
if df.empty or not has_perm:
logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.")
content = None
interval_disabled = True
else:
df = df.drop(columns=[col for col in df.columns if col not in columns])
df = clean_location_data(df)
Expand All @@ -273,7 +258,6 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de
html.Button('Display columns with raw units', id='button-clicked', n_clicks=0, style={'marginLeft': '5px'}),
trips_table
])
interval_disabled = True

# Store timing after handling Trips tab
esdsq.store_dashboard_time(
Expand All @@ -294,25 +278,20 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de
df = pd.DataFrame(data)
if df.empty:
content = None
interval_disabled = True
else:
content = populate_datatable(df)
interval_disabled = True
elif len(data) > 1:
if not has_perm:
content = None
interval_disabled = True
else:
content = html.Div([
dcc.Tabs(id='subtabs-demographics', value=list(data.keys())[0], children=[
dcc.Tab(label=key, value=key) for key in data
]),
html.Div(id='subtabs-demographics-content')
])
interval_disabled = True
else:
content = None
interval_disabled = True

# Store timing after handling Demographics tab
esdsq.store_dashboard_time(
Expand All @@ -338,17 +317,14 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de
if df.empty or not has_perm:
logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.")
content = None
interval_disabled = True
else:
df = df.drop(columns=[col for col in df.columns if col not in columns])

datatable = populate_datatable(df)

content = datatable
interval_disabled = True
else:
content = None
interval_disabled = True

# Store timing after handling Trajectories tab
esdsq.store_dashboard_time(
Expand All @@ -360,16 +336,14 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de
else:
logging.debug(f"Callback - {selected_tab} Error Stage: No data loaded or unhandled tab.")
content = None
interval_disabled = True

# Store total timing after all stages
esdsq.store_dashboard_time(
"admin/data/render_content/total_time",
total_timer
)

return content, updated_loaded_uuids_store, interval_disabled

return content

# Handle subtabs for demographic table when there are multiple surveys
@callback(
Expand Down
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,8 @@ flask-talisman==1.0.0
dash_auth==2.0.0
arrow==1.3.0
dash-leaflet==1.0.7
# dash[diskcache] dependencies
# from https://github.com/plotly/dash/blob/dev/requirements/diskcache.txt
diskcache>=5.2.1
multiprocess>=0.70.12
psutil>=5.8.0

0 comments on commit 29a4a08

Please sign in to comment.