Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[azure][feat] Rewrite metrics collection to make it better #2298

Merged
merged 14 commits into from
Dec 12, 2024
Merged
15 changes: 15 additions & 0 deletions plugins/azure/fix_plugin_azure/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
MicrosoftGraphOrganizationRoot,
)
from fix_plugin_azure.resource.monitor import resources as monitor_resources
from fix_plugin_azure.resource.metrics import AzureMetricData
from fix_plugin_azure.resource.mysql import AzureMysqlServerType, resources as mysql_resources
from fix_plugin_azure.resource.network import (
AzureNetworkExpressRoutePortsLocation,
Expand Down Expand Up @@ -159,6 +160,14 @@ def get_last_run() -> Optional[datetime]:
for after_collect in builder.after_collect_actions:
after_collect()

if builder.config.collect_usage_metrics:
try:
log.info(f"[Azure:{self.account.safe_name}] Collect usage metrics.")
self.collect_usage_metrics(builder)
builder.executor.wait_for_submitted_work()
except Exception as e:
log.warning(f"[Azure] Failed to collect usage metrics in project {self.account.safe_name}: {e}")

# connect nodes
log.info(f"[Azure:{self.account.safe_name}] Connect resources and create edges.")
for node, data in list(self.graph.nodes(data=True)):
Expand All @@ -184,6 +193,12 @@ def get_last_run() -> Optional[datetime]:
self.core_feedback.progress_done(self.account.id, 1, 1, context=[self.cloud.id])
log.info(f"[Azure:{self.account.safe_name}] Collecting resources done.")

def collect_usage_metrics(self, builder: GraphBuilder) -> None:
for resource in builder.graph.nodes:
if isinstance(resource, MicrosoftResource) and (mq := resource.collect_usage_metrics(builder)):
start_at = builder.created_at - builder.metrics_delta
AzureMetricData.query_for(builder, resource, mq, start_at, builder.created_at)

def collect_resource_list(
self, name: str, builder: GraphBuilder, resources: List[Type[MicrosoftResource]]
) -> Future[None]:
Expand Down
80 changes: 67 additions & 13 deletions plugins/azure/fix_plugin_azure/resource/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import logging
from concurrent.futures import Future
from datetime import datetime, timedelta
from typing import Any, ClassVar, Dict, Optional, TypeVar, List, Type, Callable, cast, Union, Set
from typing import Any, ClassVar, Dict, Optional, TypeVar, List, Type, Tuple, Callable, cast, Union, Set

from attr import define, field
from attrs import frozen
from azure.identity import DefaultAzureCredential

from fix_plugin_azure.azure_client import AzureResourceSpec, MicrosoftClient, MicrosoftRestSpec
Expand All @@ -20,6 +21,9 @@
BaseRegion,
ModelReference,
PhantomBaseResource,
StatName,
MetricName,
MetricUnit,
)
from fixlib.config import current_config
from fixlib.core.actions import CoreFeedback
Expand Down Expand Up @@ -187,12 +191,9 @@ def connect_in_graph(self, builder: GraphBuilder, source: Json) -> None:
# Default behavior: add resource to the namespace
pass

@classmethod
def collect_usage_metrics(
cls: Type[MicrosoftResourceType], builder: GraphBuilder, collected_resources: List[MicrosoftResourceType]
) -> None:
def collect_usage_metrics(self, builder: GraphBuilder) -> List[AzureMetricQuery]:
# Default behavior: do nothing
pass
return []

@classmethod
def collect_resources(
Expand All @@ -203,13 +204,7 @@ def collect_resources(
if spec := cls.api_spec:
try:
items = builder.client.list(spec, **kwargs)
collected = cls.collect(items, builder)
if builder.config.collect_usage_metrics:
try:
cls.collect_usage_metrics(builder, collected)
except Exception as e:
log.warning(f"Failed to collect usage metrics for {cls.__name__}: {e}")
return collected
return cls.collect(items, builder)
except Exception as e:
msg = f"Error while collecting {cls.__name__} with service {spec.service} and location: {builder.location}: {e}"
builder.core_feedback.info(msg, log)
Expand Down Expand Up @@ -1008,6 +1003,65 @@ def with_location(self, location: BaseRegion) -> GraphBuilder:
)


STAT_MAP: Dict[str, StatName] = {
"minimum": StatName.min,
"average": StatName.avg,
"maximum": StatName.max,
}


@frozen(kw_only=True)
class MetricNormalization:
unit: MetricUnit
normalize_value: Callable[[float], float] = lambda x: x


@define(hash=True, frozen=True)
class AzureMetricQuery:
metric_name: str
metric_namespace: str
metric_normalization_name: MetricName
ref_id: str
instance_id: str
metric_id: str
aggregation: Tuple[str, ...]
normalization: MetricNormalization
custom_period: Optional[timedelta] = None
custom_start_time: Optional[datetime] = None
unit: str = "Count"

@staticmethod
def create(
*,
metric_name: str,
metric_namespace: str,
metric_normalization_name: MetricName,
instance_id: str,
ref_id: str,
normalization: MetricNormalization,
aggregation: Tuple[str, ...],
unit: str = "Count",
custom_start_time: Optional[datetime] = None,
custom_period: Optional[timedelta] = None,
metric_id: Optional[str] = None,
) -> "AzureMetricQuery":
metric_id = f"{instance_id}/providers/Microsoft.Insights/metrics/{metric_name}"
# noinspection PyTypeChecker
return AzureMetricQuery(
metric_name=metric_name,
metric_namespace=metric_namespace,
metric_normalization_name=metric_normalization_name,
instance_id=instance_id,
metric_id=metric_id,
aggregation=aggregation,
ref_id=ref_id,
unit=unit,
normalization=normalization,
custom_period=custom_period,
custom_start_time=custom_start_time,
)


resources: List[Type[MicrosoftResource]] = [
AzureResourceGroup,
]
Loading
Loading