Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve batch loading of UUIDs stats #154

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion app_sidebar_collapsible.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import dash
import dash_bootstrap_components as dbc
from dash import Input, Output, dcc, html, Dash
from dash import Input, Output, dcc, html, Dash, DiskcacheManager
import dash_auth
import logging
import base64
Expand Down Expand Up @@ -42,10 +42,17 @@
'hello': 'world'
}

# diskcache manager; required for 'background' callbacks
# https: // dash.plotly.com/background-callbacks
import diskcache
cache = diskcache.Cache("./cache")
background_callback_manager = DiskcacheManager(cache)

app = Dash(
external_stylesheets=[dbc.themes.BOOTSTRAP, dbc.icons.FONT_AWESOME],
suppress_callback_exceptions=True,
use_pages=True,
background_callback_manager=background_callback_manager
)
server = app.server # expose server variable for Procfile

Expand Down
141 changes: 57 additions & 84 deletions pages/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Since the dcc.Location component is not in the layout when navigating to this page, it triggers the callback.
The workaround is to check if the input value is None.
"""
from dash import dcc, html, Input, Output, callback, register_page, dash_table, State
from dash import dcc, html, Input, Output, callback, register_page, dash_table, State, set_props
# Etc
import logging
import pandas as pd
Expand Down Expand Up @@ -31,9 +31,8 @@
]),
html.Div(id='tabs-content'),
dcc.Store(id='selected-tab', data='tab-uuids-datatable'), # Store to hold selected tab
dcc.Interval(id='interval-load-more', interval=24000, n_intervals=0), # Interval for loading more data
dcc.Store(id='store-uuids', data=[]), # Store to hold the original UUIDs data
dcc.Store(id='store-loaded-uuids', data={'data': [], 'loaded': False}), # Store to track loaded data
dcc.Store(id='loaded-uuids-stats', data=[]),
dcc.Store(id='all-uuids-stats-loaded', data=False),
dcc.Store(id='uuids-page-current', data=0), # Store to track current page for UUIDs DataTable
# RadioItems for key list switch, wrapped in a div that can hide/show
html.Div(
Expand Down Expand Up @@ -143,10 +142,37 @@ def update_uuids_page_current(page_current, selected_tab):
raise PreventUpdate


@callback(
Output('all-uuids-stats-loaded', 'data'),
Input('tabs-datatable', 'value'),
Input('store-uuids', 'data'),
background=True,
# hide the global spinner while callback is running
running=[Output('global-loading', 'display'), 'hide', 'auto'],
# if page changes or tab changes while callback is running, cancel
cancel=[
Input('url', 'pathname'),
Input('tabs-datatable', 'value')
],
)
def load_uuids_stats(tab, uuids):
logging.debug("loading uuids stats for tab %s" % tab)
if tab != 'tab-uuids-datatable':
return

# slice uuids into chunks of 10
uuids_chunks = [uuids['data'][i:i+10] for i in range(0, len(uuids['data']), 10)]
loaded_stats = []
for uuids in uuids_chunks:
processed_uuids = db_utils.add_user_stats(uuids, 10)
loaded_stats.extend(processed_uuids)
logging.debug("loaded %s uuids stats: %s" % (len(loaded_stats), loaded_stats))
set_props('loaded-uuids-stats', {'data': loaded_stats})
return True


@callback(
Output('tabs-content', 'children'),
Output('store-loaded-uuids', 'data'),
Output('interval-load-more', 'disabled'), # Disable interval when all data is loaded
Input('tabs-datatable', 'value'),
Input('store-uuids', 'data'),
Input('store-excluded-uuids', 'data'),
Expand All @@ -156,13 +182,11 @@ def update_uuids_page_current(page_current, selected_tab):
Input('date-picker', 'start_date'),
Input('date-picker', 'end_date'),
Input('date-picker-timezone', 'value'),
Input('interval-load-more', 'n_intervals'), # Interval to trigger the loading of more data
Input('keylist-switch', 'value'), # Add keylist-switch to trigger data refresh on change
Input('uuids-page-current', 'data'), # Current page number for UUIDs DataTable
State('store-loaded-uuids', 'data'), # Use State to track already loaded data
State('store-loaded-uuids', 'loaded') # Keep track if we have finished loading all data
Input('loaded-uuids-stats', 'data'),
)
def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_demographics, store_trajectories, start_date, end_date, timezone, n_intervals, key_list, current_page, loaded_uuids_store, all_data_loaded):
def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_demographics, store_trajectories, start_date, end_date, timezone, key_list, current_page, loaded_uuids):
with ect.Timer() as total_timer:
initial_batch_size = 10 # Define the batch size for loading UUIDs

Expand All @@ -172,74 +196,35 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de

# Initialize return variables
content = None
updated_loaded_uuids_store = loaded_uuids_store.copy() if loaded_uuids_store else {'data': [], 'loaded': False}
interval_disabled = True

# Handle the UUIDs tab without fullscreen loading spinner
if tab == 'tab-uuids-datatable':
with ect.Timer() as handle_uuids_timer:
logging.debug(f"Callback - {selected_tab} Stage 2: Handling UUIDs tab.")
# Prepare the data to be displayed
columns = perm_utils.get_uuids_columns() # Get the relevant columns
df = pd.DataFrame(loaded_uuids)

# Ensure store_uuids contains the key 'data' which is a list of dictionaries
if not isinstance(store_uuids, dict) or 'data' not in store_uuids:
logging.error(f"Expected store_uuids to be a dict with a 'data' key, but got {type(store_uuids)}")
content = html.Div([html.P("Data structure error.")])
interval_disabled = True
if not perm_utils.has_permission('data_uuids'):
logging.debug(f"Callback - {selected_tab} insufficient permission.")
content = html.Div([html.P("No data available or you don't have permission.")])
else:
uuids_list = store_uuids['data']

# Ensure uuids_list is a list for slicing
if not isinstance(uuids_list, list):
logging.error(f"Expected store_uuids['data'] to be a list but got {type(uuids_list)}")
content = html.Div([html.P("Data structure error.")])
interval_disabled = True
if df.empty and len(store_uuids['data']) > 0:
logging.debug(f"Callback - {selected_tab} loaded_uuids is empty.")
content = html.Div(
[dcc.Loading(id='uuids-loading', display='show')],
style={'margin-top': '36px'}
)
else:
loaded_data = updated_loaded_uuids_store.get('data', [])
total_loaded = len(loaded_data)

# Handle lazy loading
if not updated_loaded_uuids_store.get('loaded', False):
total_to_load = total_loaded + initial_batch_size
total_to_load = min(total_to_load, len(uuids_list)) # Avoid loading more than available

logging.debug(f"Callback - {selected_tab} Stage 3: Loading next batch of UUIDs from {total_loaded} to {total_to_load}.")

new_data = uuids_list[total_loaded:total_to_load]

if new_data:
# Process and append the new data to the loaded store
processed_data = db_utils.add_user_stats(new_data, initial_batch_size)
loaded_data.extend(processed_data)

# Update the store with the new data by creating a new dict
updated_loaded_uuids_store = {
'data': loaded_data,
'loaded': len(loaded_data) >= len(uuids_list)
}

logging.debug(f"Callback - {selected_tab} Stage 4: New batch loaded. Total loaded: {len(loaded_data)}.")

# Prepare the data to be displayed
columns = perm_utils.get_uuids_columns() # Get the relevant columns
df = pd.DataFrame(updated_loaded_uuids_store['data'])

if df.empty or not perm_utils.has_permission('data_uuids'):
logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.")
content = html.Div([html.P("No data available or you don't have permission.")])
interval_disabled = True
else:
df = df.drop(columns=[col for col in df.columns if col not in columns])

logging.debug(f"Callback - {selected_tab} Stage 5: Returning appended data to update the UI.")
content = html.Div([
populate_datatable(df, table_id='uuid-table', page_current=current_page), # Pass current_page
html.P(
f"Showing {len(updated_loaded_uuids_store['data'])} of {len(uuids_list)} UUIDs." +
(f" Loading {initial_batch_size} more..." if not updated_loaded_uuids_store.get('loaded', False) else ""),
style={'margin': '15px 5px'}
)
])
interval_disabled = updated_loaded_uuids_store.get('loaded', False)
df = df.drop(columns=[col for col in df.columns if col not in columns])
logging.debug(f"Callback - {selected_tab} Stage 5: Returning appended data to update the UI.")
content = html.Div([
populate_datatable(df, table_id='uuid-table', page_current=current_page), # Pass current_page
html.P(
f"Showing {len(loaded_uuids)} of {len(store_uuids['data'])} UUIDs." +
(f" Loading {initial_batch_size} more..." if len(loaded_uuids) < len(store_uuids['data']) else ""),
style={'margin': '15px 5px'}
)
])

# Store timing after handling UUIDs tab
esdsq.store_dashboard_time(
Expand All @@ -262,7 +247,6 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de
if df.empty or not has_perm:
logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.")
content = None
interval_disabled = True
else:
df = df.drop(columns=[col for col in df.columns if col not in columns])
df = clean_location_data(df)
Expand All @@ -273,7 +257,6 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de
html.Button('Display columns with raw units', id='button-clicked', n_clicks=0, style={'marginLeft': '5px'}),
trips_table
])
interval_disabled = True

# Store timing after handling Trips tab
esdsq.store_dashboard_time(
Expand All @@ -294,25 +277,20 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de
df = pd.DataFrame(data)
if df.empty:
content = None
interval_disabled = True
else:
content = populate_datatable(df)
interval_disabled = True
elif len(data) > 1:
if not has_perm:
content = None
interval_disabled = True
else:
content = html.Div([
dcc.Tabs(id='subtabs-demographics', value=list(data.keys())[0], children=[
dcc.Tab(label=key, value=key) for key in data
]),
html.Div(id='subtabs-demographics-content')
])
interval_disabled = True
else:
content = None
interval_disabled = True

# Store timing after handling Demographics tab
esdsq.store_dashboard_time(
Expand All @@ -338,17 +316,14 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de
if df.empty or not has_perm:
logging.debug(f"Callback - {selected_tab} Error Stage: No data available or permission issues.")
content = None
interval_disabled = True
else:
df = df.drop(columns=[col for col in df.columns if col not in columns])

datatable = populate_datatable(df)

content = datatable
interval_disabled = True
else:
content = None
interval_disabled = True

# Store timing after handling Trajectories tab
esdsq.store_dashboard_time(
Expand All @@ -360,16 +335,14 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de
else:
logging.debug(f"Callback - {selected_tab} Error Stage: No data loaded or unhandled tab.")
content = None
interval_disabled = True

# Store total timing after all stages
esdsq.store_dashboard_time(
"admin/data/render_content/total_time",
total_timer
)

return content, updated_loaded_uuids_store, interval_disabled

return content

# Handle subtabs for demographic table when there are multiple surveys
@callback(
Expand Down
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,8 @@ flask-talisman==1.0.0
dash_auth==2.0.0
arrow==1.3.0
dash-leaflet==1.0.7
# dash[diskcache] dependencies
# from https://github.com/plotly/dash/blob/dev/requirements/diskcache.txt
diskcache>=5.2.1
multiprocess>=0.70.12
psutil>=5.8.0