From f1b6be0a4c08fe51d235b1bcfecc409185bed35c Mon Sep 17 00:00:00 2001 From: Ezra Odio Date: Tue, 30 Jul 2024 19:01:19 +0000 Subject: [PATCH 1/2] Add scheduled queries with no latest_query_data to outdated_queries --- redash/models/__init__.py | 22 +++++++++++++++------- tests/test_models.py | 14 ++++++++++++++ 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/redash/models/__init__.py b/redash/models/__init__.py index ce3446c916..172c6aeabb 100644 --- a/redash/models/__init__.py +++ b/redash/models/__init__.py @@ -389,6 +389,8 @@ def groups(self): def should_schedule_next(previous_iteration, now, interval, time=None, day_of_week=None, failures=0): # if time exists then interval > 23 hours (82800s) # if day_of_week exists then interval > 6 days (518400s) + if previous_iteration is None: + return False if time is None: ttl = int(interval) next_iteration = previous_iteration + datetime.timedelta(seconds=ttl) @@ -608,17 +610,23 @@ def outdated_queries(cls): if schedule_until <= now: continue + if all(value is None for value in query.schedule.values()): + continue + retrieved_at = scheduled_queries_executions.get(query.id) or ( query.latest_query_data and query.latest_query_data.retrieved_at ) - if should_schedule_next( - retrieved_at or now, - now, - query.schedule["interval"], - query.schedule["time"], - query.schedule["day_of_week"], - query.schedule_failures, + if ( + should_schedule_next( + retrieved_at, + now, + query.schedule["interval"], + query.schedule["time"], + query.schedule["day_of_week"], + query.schedule_failures, + ) + or not retrieved_at ): key = "{}:{}".format(query.query_hash, query.data_source_id) outdated_queries[key] = query diff --git a/tests/test_models.py b/tests/test_models.py index f7d563aaae..97fcfddfd6 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -216,6 +216,20 @@ def test_enqueues_query_only_once(self): self.assertEqual(list(models.Query.outdated_queries()), [query2]) + def test_enqueues_scheduled_query_without_latest_query_data(self): + """ + Queries with a schedule but no latest_query_data will still be reported by Query.outdated_queries() + """ + query = self.factory.create_query( + schedule=self.schedule(interval="60"), + data_source=self.factory.create_data_source(), + ) + + outdated_queries = models.Query.outdated_queries() + self.assertEqual(query.latest_query_data, None) + self.assertEqual(len(outdated_queries), 1) + self.assertIn(query, outdated_queries) + def test_enqueues_query_with_correct_data_source(self): """ Queries from different data sources will be reported by From 49fa2b1d6810d3fef96a59d2ce3f315d8895b207 Mon Sep 17 00:00:00 2001 From: Arik Fraimovich Date: Sun, 27 Oct 2024 15:05:49 +0200 Subject: [PATCH 2/2] Apply code review comments --- redash/models/__init__.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/redash/models/__init__.py b/redash/models/__init__.py index 172c6aeabb..f3483bfa3a 100644 --- a/redash/models/__init__.py +++ b/redash/models/__init__.py @@ -387,10 +387,12 @@ def groups(self): def should_schedule_next(previous_iteration, now, interval, time=None, day_of_week=None, failures=0): + # if previous_iteration is None, it means the query has never been run before + # so we should schedule it immediately + if previous_iteration is None: + return True # if time exists then interval > 23 hours (82800s) # if day_of_week exists then interval > 6 days (518400s) - if previous_iteration is None: - return False if time is None: ttl = int(interval) next_iteration = previous_iteration + datetime.timedelta(seconds=ttl) @@ -604,29 +606,28 @@ def outdated_queries(cls): if query.schedule.get("disabled"): continue + # Skip queries that have None for all schedule values. It's unclear whether this + # something that can happen in practice, but we have a test case for it. + if all(value is None for value in query.schedule.values()): + continue + if query.schedule["until"]: schedule_until = pytz.utc.localize(datetime.datetime.strptime(query.schedule["until"], "%Y-%m-%d")) if schedule_until <= now: continue - if all(value is None for value in query.schedule.values()): - continue - retrieved_at = scheduled_queries_executions.get(query.id) or ( query.latest_query_data and query.latest_query_data.retrieved_at ) - if ( - should_schedule_next( - retrieved_at, - now, - query.schedule["interval"], - query.schedule["time"], - query.schedule["day_of_week"], - query.schedule_failures, - ) - or not retrieved_at + if should_schedule_next( + retrieved_at, + now, + query.schedule["interval"], + query.schedule["time"], + query.schedule["day_of_week"], + query.schedule_failures, ): key = "{}:{}".format(query.query_hash, query.data_source_id) outdated_queries[key] = query