Skip to content

Commit

Permalink
fix: use sane sleep on primary coordinator main loop
Browse files Browse the repository at this point in the history
The primary coordinator main loop shall not use zero sleep
when heartbeat has been missed. Use half of the heartbeat
interval in that case.
  • Loading branch information
jjaakola-aiven committed Jan 16, 2023
1 parent 6a0d7d1 commit 9063047
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion karapace/master_coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ def run(self) -> None:
try:
if not self.kafka_client:
if self.init_kafka_client() is False:
# If Kafka client is not initialized sleep a bit
time.sleep(0.5)
continue
if not self.sc:
self.init_schema_coordinator()
Expand All @@ -236,7 +238,13 @@ def run(self) -> None:
self.sc.ensure_active_group()
self.sc.poll_heartbeat()
LOG.debug("We're master: %r: master_uri: %r", self.sc.are_we_master, self.sc.master_url)
time.sleep(min(_hb_interval, self.sc.time_to_next_heartbeat()))
# In cases when heartbeat is missed the sleep min sleep time would be 0
# from `time_to_next_heartbeat`. In that case halve the heartbeat interval for
# some sane sleep instead of running the loop without sleep for a while.
sleep_time = min(_hb_interval, self.sc.time_to_next_heartbeat())
if not sleep_time:
sleep_time = _hb_interval / 2
time.sleep(sleep_time)
except: # pylint: disable=bare-except
LOG.exception("Exception in master_coordinator")
time.sleep(1.0)
Expand Down

0 comments on commit 9063047

Please sign in to comment.