Skip to content

Commit

Permalink
Merge pull request #166 from NexaAI/david/power-group
Browse files Browse the repository at this point in the history
nexa_perf for memory, latency and energy evaluation initial checkin
  • Loading branch information
zhiyuan8 authored Oct 16, 2024
2 parents b1a1479 + 97b0fd3 commit e98f3cd
Show file tree
Hide file tree
Showing 21 changed files with 2,558 additions and 35 deletions.
40 changes: 21 additions & 19 deletions nexa/cli/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def run_onnx_inference(args):

def run_eval_tasks(args):
try:
if 'do-not-answer' in args.tasks:
if args.tasks and 'do-not-answer' in args.tasks:
if not os.getenv('OPENAI_API_KEY'):
print("Warning: The 'do-not-answer' task requires an OpenAI API key.")
print("Please set your API key in the terminal using the following command:")
Expand All @@ -221,9 +221,13 @@ def run_eval_tasks(args):

from nexa.eval.nexa_eval import NexaEval
evaluator = NexaEval(model_path, args.tasks, args.limit, args.port, args.nctx)
evaluator.run_evaluation()
if not args.tasks:
evaluator.run_perf_eval(args.device, args.new_tokens)
else:
evaluator.run_evaluation()
except Exception as e:
print(f"Error running evaluation, please run: pip install nexaai[eval]")
print("Please run: pip install nexaai[eval]")
print(f"Error running evaluation: {e}")
return

def run_embedding_generation(args):
Expand Down Expand Up @@ -257,18 +261,8 @@ def run_embedding_generation(args):
print("Please refer to our docs to install nexaai package: https://docs.nexaai.com/getting-started/installation")

def main():
parser = argparse.ArgumentParser(
description="Nexa CLI tool for handling various model operations."
)

parser.add_argument(
"-V",
"--version",
action="version",
version=__version__,
help="Show the version of the Nexa SDK.",
)

parser = argparse.ArgumentParser(description="Nexa CLI tool for handling various model operations.")
parser.add_argument("-V", "--version", action="version", version=__version__, help="Show the version of the Nexa SDK.")
subparsers = parser.add_subparsers(dest="command", help="sub-command help")

# Run command
Expand Down Expand Up @@ -371,10 +365,18 @@ def main():
# Benchmark Evaluation
eval_parser = subparsers.add_parser("eval", help="Evaluate models on specified tasks.")
eval_parser.add_argument("model_path", type=str, help="Path or identifier for the model in Nexa Model Hub")
eval_parser.add_argument("--tasks", type=str, required=True, help="Tasks to evaluate the model on, separated by commas.")
eval_parser.add_argument("--limit", type=float, help="Limit the number of examples per task. If <1, limit is a percentage of the total number of examples.", default=None)
eval_parser.add_argument("--port", type=int, help="Port to bind the server to", default=8300)
eval_parser.add_argument("--nctx", type=int, help="Length of context window", default=4096)

# General evaluation options
general_eval_group = eval_parser.add_argument_group('General evaluation options')
general_eval_group.add_argument("--tasks", type=str, help="Tasks to evaluate the model on, separated by commas.")
general_eval_group.add_argument("--limit", type=float, help="Limit the number of examples per task. If <1, limit is a percentage of the total number of examples.", default=None)
general_eval_group.add_argument("--port", type=int, help="Port to bind the server to", default=8300)
general_eval_group.add_argument("--nctx", type=int, help="Length of context window", default=4096)

# Performance evaluation options
perf_eval_group = eval_parser.add_argument_group('Performance evaluation options')
perf_eval_group.add_argument("--device", type=str, help="Device to run performance evaluation on, choose from 'cpu', 'cuda', 'mps'", default="cpu")
perf_eval_group.add_argument("--new_tokens", type=int, help="Number of new tokens to evaluate", default=100)

# Embed command
embed_parser = subparsers.add_parser("embed", help="Generate embeddings for a given prompt.")
Expand Down
64 changes: 56 additions & 8 deletions nexa/eval/nexa_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,21 @@
from contextlib import ExitStack
from nexa.eval import evaluator
from nexa.eval.nexa_task.task_manager import TaskManager
from nexa.eval.utils import make_table, simple_parse_args_string, handle_non_serializable
from nexa.eval.utils import make_table, handle_non_serializable
from nexa.gguf.server.nexa_service import run_nexa_ai_service as NexaServer
from nexa.constants import NEXA_MODEL_EVAL_RESULTS_PATH, NEXA_RUN_MODEL_MAP
from nexa.eval.nexa_perf import (
Benchmark,
BenchmarkConfig,
InferenceConfig,
ProcessConfig,
NexaConfig,
)

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class NexaEval:
def __init__(self, model_path: str, tasks: str, limit: float = None, port: int = None, nctx: int = None):
def __init__(self, model_path: str, tasks: str = None, limit: float = None, port: int = None, nctx: int = None):
model_path = NEXA_RUN_MODEL_MAP.get(model_path, model_path)
self.model_path = model_path

Expand All @@ -31,7 +38,9 @@ def __init__(self, model_path: str, tasks: str, limit: float = None, port: int =
self.initial_port = port if port is not None else 8300
self.port = self.initial_port
self.server_url = f"http://0.0.0.0:{self.port}"
output_path = Path(NEXA_MODEL_EVAL_RESULTS_PATH) / self.model_name / self.model_tag / self.tasks.replace(',', '_')
output_path = Path(NEXA_MODEL_EVAL_RESULTS_PATH) / self.model_name / self.model_tag
if self.tasks:
output_path = output_path / self.tasks.replace(',', '_')
self.eval_args = {
"model": self.model_path,
"tasks": self.tasks,
Expand Down Expand Up @@ -181,23 +190,62 @@ def run_evaluation(self):
except Exception as e:
logging.error(f"An error occurred during evaluation: {e}")

def run_perf_eval(self, device: str, new_tokens: int):
BENCHMARK_NAME = f"nexa_sdk_{self.model_path}"
launcher_config = ProcessConfig()
backend_config = NexaConfig(
device=device,
model=self.model_path,
task="text-generation",
)
scenario_config = InferenceConfig(
latency=True,
memory=True,
energy=True,
input_shapes={
"batch_size": 1, # TODO: make it dynamic, hardcoded to 1 for now
"sequence_length": 256,
"vocab_size": 32000,
},
generate_kwargs={
"max_new_tokens": new_tokens,
"min_new_tokens": new_tokens,
},
)
benchmark_config = BenchmarkConfig(
name=BENCHMARK_NAME,
launcher=launcher_config,
backend=backend_config,
scenario=scenario_config,
)

# Launch the benchmark with the specified configuration
benchmark_report = Benchmark.launch(benchmark_config)
benchmark_report.save_csv(f"benchmark_report_{self.model_path}.csv")


def stop_server(self):
if self.server_process:
self.server_process.terminate()
self.server_process.join()
logging.info("Server process terminated")

def run_eval_inference(model_path: str, tasks: str, limit: float = None, port: int = None, nctx: int = None):
def run_eval_inference(model_path: str, tasks: str = None, limit: float = None, port: int = None, nctx: int = None, device: str = "cpu", new_tokens: int = 100):
evaluator = NexaEval(model_path, tasks, limit, port, nctx)
evaluator.run_evaluation()
if not tasks:
evaluator.run_perf_eval(device, new_tokens)
else:
evaluator.run_evaluation()

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run Nexa Model Evaluation")
parser.add_argument("model_path", type=str, help="Path or identifier for the model in Nexa Model Hub")
parser.add_argument("--tasks", type=str, help="Tasks to evaluate, comma-separated")
parser.add_argument("--tasks", type=str, help="Tasks to evaluate, comma-separated", default=None)
parser.add_argument("--limit", type=float, help="Limit the number of examples per task. If <1, limit is a percentage of the total number of examples.", default=None)
parser.add_argument("--port", type=int, help="Initial port to bind the server to", default=8300)
parser.add_argument("--nctx", type=int, help="Length of context window", default=4096)

parser.add_argument("--device", type=str, help="Device to run the inference on, choose from 'cpu', 'cuda', 'mps'", default="cpu")
parser.add_argument("--new_tokens", type=int, help="Number of new tokens to evaluate", default=100)

args = parser.parse_args()
run_eval_inference(args.model_path, args.tasks, args.limit, args.port, args.nctx)
run_eval_inference(args.model_path, args.tasks, args.limit, args.port, args.nctx, args.device, args.new_tokens)
24 changes: 24 additions & 0 deletions nexa/eval/nexa_perf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from .nexa_backend import NexaConfig
from .perf_benchmark import Benchmark, BenchmarkConfig, BenchmarkReport
from .process_launcher import ProcessConfig
from .inference_scenario import InferenceConfig
from .energy_tracker import EnergyTracker, Efficiency, Energy
from .latency_tracker import LatencyTracker, Throughput, Latency
from .memory_tracker import MemoryTracker, Memory

__all__ = [
"Benchmark",
"BenchmarkConfig",
"BenchmarkReport",
"InferenceConfig",
"ProcessConfig",
"NexaConfig",
"EnergyTracker",
"Efficiency",
"Energy",
"LatencyTracker",
"Throughput",
"Latency",
"MemoryTracker",
"Memory",
]
Loading

0 comments on commit e98f3cd

Please sign in to comment.