From 21ad23a70a804d6424319b61d9c741f89d4db9ae Mon Sep 17 00:00:00 2001 From: Edmund Higham Date: Wed, 31 Jul 2024 15:35:36 -0400 Subject: [PATCH] [batch] make batches query go brrrrrrr (#14629) This PR optimises the V2 batches query along with its associated subqueries. Listing batches for the CI user now executes in 1s were previously it had taken ~14s. Resolves #14599 --------- Co-authored-by: Chris Llanwarne --- batch/batch/front_end/query/query.py | 37 +++++++++++----- batch/batch/front_end/query/query_v2.py | 58 ++++++++++++++----------- 2 files changed, 57 insertions(+), 38 deletions(-) diff --git a/batch/batch/front_end/query/query.py b/batch/batch/front_end/query/query.py index 5534eecf974..f1fdf78eec3 100644 --- a/batch/batch/front_end/query/query.py +++ b/batch/batch/front_end/query/query.py @@ -455,10 +455,14 @@ def __init__(self, term: str): self.term = term def query(self) -> Tuple[str, List[str]]: - sql = """ -((job_groups.batch_id, job_groups.job_group_id) IN - (SELECT batch_id, job_group_id FROM job_group_attributes - WHERE `key` = %s OR `value` = %s)) + sql = """\ +EXISTS ( + SELECT NULL + FROM job_group_attributes AS attrs + WHERE attrs.batch_id = job_groups.batch_id + AND attrs.job_group_id = job_groups.job_group_id + AND (attrs.`key` = %s OR attrs.`value` = %s) +) """ return (sql, [self.term, self.term]) @@ -478,10 +482,14 @@ def __init__(self, term: str): self.term = term def query(self) -> Tuple[str, List[str]]: - sql = """ -((job_groups.batch_id, job_groups.job_group_id) IN - (SELECT batch_id, job_group_id FROM job_group_attributes - WHERE `key` LIKE %s OR `value` LIKE %s)) + sql = """\ +EXISTS ( + SELECT NULL + FROM job_group_attributes AS attrs + WHERE attrs.batch_id = job_groups.batch_id + AND attrs.job_group_id = job_groups.job_group_id + AND (attrs.`key` LIKE %s OR attrs.`value` LIKE %s) +) """ escaped_term = f'%{self.term}%' return (sql, [escaped_term, escaped_term]) @@ -505,10 +513,15 @@ def query(self) -> Tuple[str, List[str]]: value = self.value if isinstance(self.operator, PartialMatchOperator): value = f'%{value}%' - sql = f""" -((job_groups.batch_id, job_groups.job_group_id) IN - (SELECT batch_id, job_group_id FROM job_group_attributes - WHERE `key` = %s AND `value` {op} %s)) + sql = f"""\ +EXISTS ( + SELECT NULL + FROM job_group_attributes AS attrs + WHERE attrs.batch_id = job_groups.batch_id + AND attrs.job_group_id = job_groups.job_group_id + AND attrs.`key` = %s + AND attrs.`value` {op} %s +) """ return (sql, [self.key, value]) diff --git a/batch/batch/front_end/query/query_v2.py b/batch/batch/front_end/query/query_v2.py index 6e82a00c50f..67e5b2ffc94 100644 --- a/batch/batch/front_end/query/query_v2.py +++ b/batch/batch/front_end/query/query_v2.py @@ -125,40 +125,46 @@ def parse_list_batches_query_v2(user: str, q: str, last_batch_id: Optional[int]) where_conditions.append(f'({cond})') where_args += args - sql = f""" -SELECT batches.*, - cancelled_t.cancelled IS NOT NULL AS cancelled, + sql = f"""\ +SELECT + batches.*, + cancelled_t.cancelled <=> 1 AS cancelled, job_groups_n_jobs_in_complete_states.n_completed, job_groups_n_jobs_in_complete_states.n_succeeded, job_groups_n_jobs_in_complete_states.n_failed, job_groups_n_jobs_in_complete_states.n_cancelled, - cost_t.cost, cost_t.cost_breakdown -FROM job_groups -LEFT JOIN batches ON batches.id = job_groups.batch_id -LEFT JOIN billing_projects ON batches.billing_project = billing_projects.name -LEFT JOIN job_groups_n_jobs_in_complete_states ON job_groups.batch_id = job_groups_n_jobs_in_complete_states.id AND job_groups.job_group_id = job_groups_n_jobs_in_complete_states.job_group_id -LEFT JOIN LATERAL ( - SELECT 1 AS cancelled - FROM job_group_self_and_ancestors - INNER JOIN job_groups_cancelled - ON job_group_self_and_ancestors.batch_id = job_groups_cancelled.id AND - job_group_self_and_ancestors.ancestor_id = job_groups_cancelled.job_group_id - WHERE job_groups.batch_id = job_group_self_and_ancestors.batch_id AND - job_groups.job_group_id = job_group_self_and_ancestors.job_group_id -) AS cancelled_t ON TRUE -STRAIGHT_JOIN billing_project_users ON batches.billing_project = billing_project_users.billing_project -LEFT JOIN LATERAL ( - SELECT COALESCE(SUM(`usage` * rate), 0) AS cost, JSON_OBJECTAGG(resources.resource, COALESCE(`usage` * rate, 0)) AS cost_breakdown - FROM ( - SELECT resource_id, CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage` + cost_t.cost, + cost_t.cost_breakdown +FROM batches +INNER JOIN job_groups + ON batches.id = job_groups.batch_id +INNER JOIN billing_projects + ON batches.billing_project = billing_projects.name +INNER JOIN billing_project_users + ON batches.billing_project = billing_project_users.billing_project +INNER JOIN job_groups_n_jobs_in_complete_states + ON batches.id = job_groups_n_jobs_in_complete_states.id + AND job_groups.job_group_id = job_groups_n_jobs_in_complete_states.job_group_id +LEFT JOIN (SELECT *, 1 AS cancelled FROM job_groups_cancelled) AS cancelled_t + ON batches.id = cancelled_t.id + AND job_groups.job_group_id = cancelled_t.job_group_id +INNER JOIN LATERAL ( + WITH resource_costs AS ( + SELECT + resource_id, + CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage` FROM aggregated_job_group_resources_v3 - WHERE job_groups.batch_id = aggregated_job_group_resources_v3.batch_id AND job_groups.job_group_id = aggregated_job_group_resources_v3.job_group_id + WHERE batch_id = batches.id GROUP BY resource_id - ) AS usage_t - LEFT JOIN resources ON usage_t.resource_id = resources.resource_id + ) + SELECT + COALESCE(SUM(`usage` * rate), 0) AS cost, + JSON_OBJECTAGG(resource, COALESCE(`usage` * rate, 0)) AS cost_breakdown + FROM resource_costs + INNER JOIN resources USING (resource_id) ) AS cost_t ON TRUE WHERE {' AND '.join(where_conditions)} -ORDER BY job_groups.batch_id DESC +ORDER BY batches.id DESC LIMIT 51; """