From 00b812bbd4215d5ae052c8780622971c85c42410 Mon Sep 17 00:00:00 2001 From: Matyas Selmeci Date: Mon, 20 May 2024 13:48:55 -0500 Subject: [PATCH] Cache and measure the update of the Topology Git repo separately instead of having the vo data, projects data, mappings data, and resourcegroup data all pulling from the git repo when they get updated. While I'm at it, use time.monotonic() instead of time.time() for the cache time (which avoids issues if we run, say, ntpdate). --- src/webapp/common.py | 9 +++++++++ src/webapp/models.py | 36 +++++++++++++++++++++++++----------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/src/webapp/common.py b/src/webapp/common.py index d11c8e2328..c64a406438 100644 --- a/src/webapp/common.py +++ b/src/webapp/common.py @@ -6,6 +6,7 @@ import re import subprocess import sys +import time from typing import Any, Dict, List, Union, AnyStr, NewType, TypeVar from functools import wraps @@ -394,6 +395,14 @@ def wrapped(): return wrapped +def get_timestamp(): + """Return a monotonic timestamp if available, otherwise a wall-clock timestamp.""" + if hasattr(time, "monotonic"): + return time.monotonic() + else: + return time.time() + + XROOTD_CACHE_SERVER = "XRootD cache server" XROOTD_ORIGIN_SERVER = "XRootD origin server" GRIDTYPE_1 = "OSG Production Resource" diff --git a/src/webapp/models.py b/src/webapp/models.py index f1959faa86..57b90b3468 100644 --- a/src/webapp/models.py +++ b/src/webapp/models.py @@ -2,7 +2,6 @@ import datetime import logging import os -import time from typing import Dict, Set, List, Optional import yaml @@ -24,7 +23,7 @@ def time(self): from webapp import common, contacts_reader, ldap_data, mappings, project_reader, rg_reader, vo_reader -from webapp.common import readfile +from webapp.common import readfile, get_timestamp from webapp.contacts_reader import ContactsData from webapp.topology import Topology, Downtime from webapp.vos_data import VOsData @@ -33,6 +32,7 @@ def time(self): log = logging.getLogger(__name__) topology_update_summary = Summary('topology_update_seconds', 'Time spent updating the topology repo data') +topology_git_update_summary = Summary('topology_git_update_seconds', 'Time spent pulling/cloning the topology git repo') contact_update_summary = Summary('contact_update_seconds', 'Time spent updating the contact repo data') comanage_update_summary = Summary('comanage_update_seconds', 'Time spent updating the comanage LDAP data') ligo_update_summary = Summary('ligo_update_seconds', 'Time spent updating the LIGO LDAP data') @@ -52,16 +52,16 @@ def should_update(self): """Return True if we should update, either because we're past the next update time or because force_update is True. """ - return self.force_update or not self.data or time.time() > self.next_update + return self.force_update or not self.data or get_timestamp() > self.next_update def try_again(self): """Set the next update time to now + the retry delay.""" - self.next_update = time.time() + self.retry_delay + self.next_update = get_timestamp() + self.retry_delay def update(self, data): """Cache new data and set the next update time to now + the cache lifetime.""" self.data = data - self.timestamp = time.time() + self.timestamp = get_timestamp() self.next_update = self.timestamp + self.cache_lifetime self.force_update = False @@ -90,6 +90,7 @@ def __init__(self, config=None, strict=False): self.topology = CachedData(cache_lifetime=topology_cache_lifetime) self.vos_data = CachedData(cache_lifetime=topology_cache_lifetime) self.mappings = CachedData(cache_lifetime=topology_cache_lifetime) + self.topology_repo_stamp = CachedData(cache_lifetime=topology_cache_lifetime) self.topology_data_dir = config["TOPOLOGY_DATA_DIR"] self.topology_data_repo = config.get("TOPOLOGY_DATA_REPO", "") self.topology_data_branch = config.get("TOPOLOGY_DATA_BRANCH", "") @@ -153,6 +154,19 @@ def _update_topology_repo(self): return False return True + def maybe_update_topology_repo(self): + """Update the local git clone of the topology github repo if it hasn't + been updated recently (based on the cache time for self.topology_repo_stamp). + """ + if self.topology_repo_stamp.should_update(): + with topology_git_update_summary.time(): + ok = self._update_topology_repo() + if ok: + self.topology_repo_stamp.update(get_timestamp()) + else: + self.topology_repo_stamp.try_again() + return self.topology_repo_stamp.data + def _update_contacts_repo(self): if not self.config["NO_GIT"]: parent = os.path.dirname(self.config["CONTACT_DATA_DIR"]) @@ -297,11 +311,11 @@ def get_topology(self) -> Optional[Topology]: return self.topology.data - def update_topology(self): + def update_topology(self) -> None: """ - Update topology data + Update topology facility/site/ResourceGroup data """ - ok = self._update_topology_repo() + ok = self.maybe_update_topology_repo() if ok: try: self.topology.update(rg_reader.get_topology(self.topology_dir, self.get_contacts_data(), strict=self.strict)) @@ -320,7 +334,7 @@ def get_vos_data(self) -> Optional[VOsData]: """ if self.vos_data.should_update(): with topology_update_summary.time(): - ok = self._update_topology_repo() + ok = self.maybe_update_topology_repo() if ok: try: self.vos_data.update(vo_reader.get_vos_data(self.vos_dir, self.get_contacts_data(), strict=self.strict)) @@ -341,7 +355,7 @@ def get_projects(self) -> Optional[Dict]: """ if self.projects.should_update(): with topology_update_summary.time(): - ok = self._update_topology_repo() + ok = self.maybe_update_topology_repo() if ok: try: self.projects.update(project_reader.get_projects(self.projects_dir, strict=self.strict)) @@ -364,7 +378,7 @@ def get_mappings(self, strict=None) -> Optional[mappings.Mappings]: strict = self.strict if self.mappings.should_update(): with topology_update_summary.time(): - ok = self._update_topology_repo() + ok = self.maybe_update_topology_repo() if ok: try: self.mappings.update(mappings.get_mappings(indir=self.mappings_dir, strict=strict))