Skip to content

Commit

Permalink
Adding latency and memory to energy star (#302)
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil authored Dec 5, 2024
1 parent 6e6b103 commit 1704500
Show file tree
Hide file tree
Showing 8 changed files with 415 additions and 280 deletions.
4 changes: 4 additions & 0 deletions optimum_benchmark/scenarios/energy_star/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ class EnergyStarConfig(ScenarioConfig):
audio_column_name: str = field(default="audio", metadata={"help": "Name of the column with the audio."})

# scenario options
energy: bool = field(default=True, metadata={"help": "Whether to measure energy."})
memory: bool = field(default=False, metadata={"help": "Whether to measure memory."})
latency: bool = field(default=False, metadata={"help": "Whether to measure latency."})

warmup_runs: int = field(default=10, metadata={"help": "Number of warmup runs to perform before scenarioing"})

# methods kwargs
Expand Down
323 changes: 207 additions & 116 deletions optimum_benchmark/scenarios/energy_star/scenario.py

Large diffs are not rendered by default.

327 changes: 171 additions & 156 deletions optimum_benchmark/scenarios/inference/scenario.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion optimum_benchmark/scenarios/training/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
energy_tracker = EnergyTracker(
device=backend.config.device, backend=backend.config.name, device_ids=backend.config.device_ids
)
context_stack.enter_context(energy_tracker.track(file_prefix="train"))
context_stack.enter_context(energy_tracker.track(task_name="train"))

backend.train(
training_dataset=training_dataset,
Expand Down
22 changes: 15 additions & 7 deletions optimum_benchmark/trackers/energy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import os
from contextlib import contextmanager
from dataclasses import asdict, dataclass
from json import dump
from logging import getLogger
from typing import List, Literal, Optional, Union

Expand Down Expand Up @@ -148,7 +148,7 @@ def print(self):


class EnergyTracker:
def __init__(self, backend: str, device: str, device_ids: Optional[Union[str, int, List[int]]] = None):
def __init__(self, device: str, backend: str, device_ids: Optional[Union[str, int, List[int]]] = None):
self.device = device
self.backend = backend
self.device_ids = device_ids
Expand Down Expand Up @@ -223,12 +223,18 @@ def __init__(self, backend: str, device: str, device_ids: Optional[Union[str, in
self.gpu_energy: Optional[float] = None
self.ram_energy: Optional[float] = None

def reset(self):
self.total_energy = None
self.cpu_energy = None
self.gpu_energy = None
self.ram_energy = None

@contextmanager
def track(self, file_prefix: str = "task"):
def track(self, task_name: str = "task"):
if self.is_pytorch_cuda:
torch.cuda.synchronize()

self.emission_tracker.start_task()
self.emission_tracker.start_task(task_name=task_name)

yield

Expand All @@ -237,16 +243,18 @@ def track(self, file_prefix: str = "task"):

emission_data: EmissionsData = self.emission_tracker.stop_task()

with open(f"{file_prefix}_codecarbon.json", "w") as f:
LOGGER.info(f"\t\t+ Saving codecarbon emission data to {file_prefix}_codecarbon.json")
dump(asdict(emission_data), f, indent=4)
with open(f"{task_name}_codecarbon.json", "w") as f:
LOGGER.info(f"\t\t+ Saving codecarbon emission data to {task_name}_codecarbon.json")
json.dump(asdict(emission_data), f, indent=4)

self.total_energy = emission_data.energy_consumed
self.cpu_energy = emission_data.cpu_energy
self.gpu_energy = emission_data.gpu_energy
self.ram_energy = emission_data.ram_energy

def get_energy(self) -> Energy:
assert self.total_energy is not None, "Energy must be tracked before calling this method"

return Energy(
unit=ENERGY_UNIT, cpu=self.cpu_energy, gpu=self.gpu_energy, ram=self.ram_energy, total=self.total_energy
)
12 changes: 12 additions & 0 deletions optimum_benchmark/trackers/latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ def _cpu_latency(self):
self.end_events.append(time.perf_counter())

def get_latency(self) -> Latency:
assert len(self.start_events) == len(
self.end_events
), "Mismatched number of start and end events, get_latency() should only be called outside of track() context"

if self.is_pytorch_cuda:
torch.cuda.synchronize()

Expand Down Expand Up @@ -276,6 +280,10 @@ def on_step_end(self, *args, **kwargs):
self.end_events.append(time.perf_counter())

def get_latency(self) -> Latency:
assert len(self.start_events) == len(
self.end_events
), "Mismatched number of start and end events, get_latency() should only be called outside of track() context"

if self.is_pytorch_cuda:
torch.cuda.synchronize()

Expand Down Expand Up @@ -404,6 +412,10 @@ def get_decode_latency(self) -> Latency:
return Latency.from_values(latencies_list, unit=LATENCY_UNIT)

def get_per_token_latency(self) -> Latency:
assert (
len(self.per_token_events) > 0
), "No per-token events recorded, make sure to pass the PerTokenLatencyLogitsProcessor to the generate() method"

if self.is_pytorch_cuda:
torch.cuda.synchronize()

Expand Down
2 changes: 2 additions & 0 deletions optimum_benchmark/trackers/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,8 @@ def _cpu_memory(self):
parent_connection.close()

def get_max_memory(self):
assert self.max_ram_memory is not None, "Memory tracker must be run before getting the maximum memory"

return Memory(
unit=MEMORY_UNIT,
max_ram=self.max_ram_memory,
Expand Down
3 changes: 3 additions & 0 deletions tests/test_energy_star.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def test_cli_configs(config_name):
TEST_CONFIG_DIR,
"--config-name",
config_name,
"scenario.energy=true",
"scenario.memory=true",
"scenario.latency=true",
"scenario.num_samples=2",
"scenario.input_shapes.batch_size=2",
]
Expand Down

0 comments on commit 1704500

Please sign in to comment.