From df815e5880b72b527bdebb389ba7c26ec7358c35 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Wed, 29 Mar 2023 09:33:24 +0100 Subject: [PATCH 1/2] Add ability to skip some bits --- os_capacity/prometheus.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/os_capacity/prometheus.py b/os_capacity/prometheus.py index a29e217..5d9d6eb 100755 --- a/os_capacity/prometheus.py +++ b/os_capacity/prometheus.py @@ -311,6 +311,9 @@ def collect(self): print(f"Collect started {collect_id}") guages = [] + skip_project_usage = int(os.environ.get('OS_CAPACITY_SKIP_PROJECT_USAGE', "0")) + skip_host_usage = int(os.environ.get('OS_CAPACITY_SKIP_HOST_USAGE', "0")) + conn = openstack.connect() openstack.enable_logging(debug=False) try: @@ -321,19 +324,23 @@ def collect(self): host_time = time.perf_counter() host_duration = host_time - start_time - print(f"1 of 3 host flavor capacity complete for {collect_id} it took {host_duration} seconds") - - guages += get_project_usage(conn.identity, conn.placement, conn.compute) - - project_time = time.perf_counter() - project_duration = project_time - host_time - print(f"2 of 3 project usage complete for {collect_id} it took {project_duration} seconds") - - guages += get_host_usage(resource_providers, conn.placement) - - host_usage_time = time.perf_counter() - host_usage_duration = host_usage_time - project_time - print(f"3 of 3 host usage complete for {collect_id} it took {host_usage_duration} seconds") + print(f"1 of 3: host flavor capacity complete for {collect_id} it took {host_duration} seconds") + + if not skip_project_usage: + guages += get_project_usage(conn.identity, conn.placement, conn.compute) + project_time = time.perf_counter() + project_duration = project_time - host_time + print(f"2 of 3: project usage complete for {collect_id} it took {project_duration} seconds") + else: + print("2 of 3: skipping project usage") + + if not skip_project_usage: + guages += get_host_usage(resource_providers, conn.placement) + host_usage_time = time.perf_counter() + host_usage_duration = host_usage_time - project_time + print(f"3 of 3: host usage complete for {collect_id} it took {host_usage_duration} seconds") + else: + print("3 of 3: skipping host usage") except Exception as e: print(f"error {e}") From e4dd350418ad8c3824f71d50328cc20e9e521452 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Wed, 29 Mar 2023 09:54:45 +0100 Subject: [PATCH 2/2] Add AZ skip --- README.rst | 9 +++++++++ os_capacity/prometheus.py | 31 +++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index a275bc9..eb2ae12 100644 --- a/README.rst +++ b/README.rst @@ -45,6 +45,15 @@ Or just run via docker or similar::: -p 9000:9000 ghcr.io/stackhpc/os-capacity:e08ecb8 curl localhost:9000 + +We aslo have the following optional environment variables: + +* OS_CAPACITY_EXPORTER_PORT = 9000 +* OS_CAPACITY_EXPORTER_LISTEN_ADDRESS = "0.0.0.0" +* OS_CAPACITY_SKIP_AGGREGATE_LOOKUP = 0 +* OS_CAPACITY_SKIP_PROJECT_USAGE = 0 +* OS_CAPACITY_SKIP_HOST_USAGE = 0 + Here is some example output from the exporter::: # HELP openstack_free_capacity_by_flavor_total Free capacity if you fill the cloud full of each flavor diff --git a/os_capacity/prometheus.py b/os_capacity/prometheus.py index 5d9d6eb..38a756b 100755 --- a/os_capacity/prometheus.py +++ b/os_capacity/prometheus.py @@ -110,11 +110,18 @@ def get_resource_provider_info(compute_client, placement_client): raw_rps = list(placement_client.resource_providers()) + skip_aggregate_lookup = ( + int(os.environ.get("OS_CAPACITY_SKIP_AGGREGATE_LOOKUP", "0")) == 1 + ) resource_providers = {} for raw_rp in raw_rps: rp = {"uuid": raw_rp.id} resource_providers[raw_rp.name] = rp - # TODO - get aggregates + + if skip_aggregate_lookup: + # skip checking every resource provider for their aggregates + continue + response = placement_client.get( f"/resource_providers/{raw_rp.id}/aggregates", headers={"OpenStack-API-Version": "placement 1.19"}, @@ -311,8 +318,10 @@ def collect(self): print(f"Collect started {collect_id}") guages = [] - skip_project_usage = int(os.environ.get('OS_CAPACITY_SKIP_PROJECT_USAGE', "0")) - skip_host_usage = int(os.environ.get('OS_CAPACITY_SKIP_HOST_USAGE', "0")) + skip_project_usage = ( + int(os.environ.get("OS_CAPACITY_SKIP_PROJECT_USAGE", "0")) == 1 + ) + skip_host_usage = int(os.environ.get("OS_CAPACITY_SKIP_HOST_USAGE", "0")) == 1 conn = openstack.connect() openstack.enable_logging(debug=False) @@ -324,13 +333,17 @@ def collect(self): host_time = time.perf_counter() host_duration = host_time - start_time - print(f"1 of 3: host flavor capacity complete for {collect_id} it took {host_duration} seconds") + print( + f"1 of 3: host flavor capacity complete for {collect_id} it took {host_duration} seconds" + ) if not skip_project_usage: guages += get_project_usage(conn.identity, conn.placement, conn.compute) project_time = time.perf_counter() project_duration = project_time - host_time - print(f"2 of 3: project usage complete for {collect_id} it took {project_duration} seconds") + print( + f"2 of 3: project usage complete for {collect_id} it took {project_duration} seconds" + ) else: print("2 of 3: skipping project usage") @@ -338,7 +351,9 @@ def collect(self): guages += get_host_usage(resource_providers, conn.placement) host_usage_time = time.perf_counter() host_usage_duration = host_usage_time - project_time - print(f"3 of 3: host usage complete for {collect_id} it took {host_usage_duration} seconds") + print( + f"3 of 3: host usage complete for {collect_id} it took {host_usage_duration} seconds" + ) else: print("3 of 3: skipping host usage") except Exception as e: @@ -352,8 +367,8 @@ def collect(self): if __name__ == "__main__": kwargs = { - "port": int(os.environ.get('OS_CAPACITY_EXPORTER_PORT', 9000)), - "addr": os.environ.get('OS_CAPACITY_EXPORTER_LISTEN_ADDRESS', '0.0.0.0'), + "port": int(os.environ.get("OS_CAPACITY_EXPORTER_PORT", 9000)), + "addr": os.environ.get("OS_CAPACITY_EXPORTER_LISTEN_ADDRESS", "0.0.0.0"), } prom_client.start_http_server(**kwargs)