Skip to content

Commit

Permalink
enable ipex/ov example
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Dec 10, 2024
1 parent bb2a05a commit ef84a5a
Show file tree
Hide file tree
Showing 13 changed files with 52 additions and 55 deletions.
15 changes: 7 additions & 8 deletions .github/workflows/test_cli_cpu_ipex.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,10 @@ jobs:
- name: Run tests
run: pytest tests/test_cli.py -s -k "cli and cpu and ipex"

# examples require specific machines
# - if: ${{
# (github.event_name == 'push') ||
# (github.event_name == 'workflow_dispatch') ||
# contains( github.event.pull_request.labels.*.name, 'examples')
# }}
# name: Run examples
# run: pytest tests/test_examples.py -s -k "cli and cpu and ipex"
- if: ${{
(github.event_name == 'push') ||
(github.event_name == 'workflow_dispatch') ||
contains( github.event.pull_request.labels.*.name, 'examples')
}}
name: Run examples
run: pytest tests/test_examples.py -s -k "cli and cpu and ipex"
15 changes: 7 additions & 8 deletions .github/workflows/test_cli_cpu_openvino.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,10 @@ jobs:
- name: Run tests
run: pytest tests/test_cli.py -s -k "cli and cpu and openvino"

# examples require specific machines
# - if: ${{
# (github.event_name == 'push') ||
# (github.event_name == 'workflow_dispatch') ||
# contains( github.event.pull_request.labels.*.name, 'examples')
# }}
# name: Run examples
# run: pytest tests/test_examples.py -s -k "cli and cpu and openvino"
- if: ${{
(github.event_name == 'push') ||
(github.event_name == 'workflow_dispatch') ||
contains( github.event.pull_request.labels.*.name, 'examples')
}}
name: Run examples
run: pytest tests/test_examples.py -s -k "cli and cpu and openvino"
4 changes: 2 additions & 2 deletions examples/cpu_ipex_bert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ launcher:
backend:
device: cpu
export: true
no_weights: false
torch_dtype: bfloat16
no_weights: false # because on multi-node machines, intializing weights could harm performance
torch_dtype: float32 # but use bfloat16 on compatible Intel CPUs
model: google-bert/bert-base-uncased

scenario:
Expand Down
10 changes: 3 additions & 7 deletions examples/cpu_ipex_llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,17 @@ launcher:
backend:
device: cpu
export: true
no_weights: false
torch_dtype: bfloat16
no_weights: false # because on multi-node machines, intializing weights could harm performance
torch_dtype: float32 # but use bfloat16 on compatible Intel CPUs
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0

scenario:
memory: true
latency: true

warmup_runs: 10
iterations: 10
duration: 10

input_shapes:
batch_size: 1
sequence_length: 256
sequence_length: 64

generate_kwargs:
max_new_tokens: 32
Expand Down
6 changes: 3 additions & 3 deletions examples/cpu_llama_cpp_embedding.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ backend:
scenario:
input_shapes:
batch_size: 1
sequence_length: 256
sequence_length: 64

generate_kwargs:
max_new_tokens: 100
min_new_tokens: 100
max_new_tokens: 32
min_new_tokens: 32
9 changes: 4 additions & 5 deletions examples/cpu_llama_cpp_text_generation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@ backend:
filename: tinyllama-1.1b-chat-v1.0.Q4_0.gguf

scenario:
memory: true
latency: true

input_shapes:
batch_size: 1
sequence_length: 256

generate_kwargs:
max_new_tokens: 100
min_new_tokens: 100
sequence_length: 128
6 changes: 6 additions & 0 deletions examples/cpu_onnxruntime_static_quant_vit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,9 @@ backend:
is_static: true
per_channel: false
calibration: true

scenario:
memory: true
latency: true
input_shapes:
batch_size: 2
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ backend:
model: google-bert/bert-base-uncased

scenario:
memory: true
latency: true

input_shapes:
batch_size: 1
sequence_length: 16
sequence_length: 128
2 changes: 1 addition & 1 deletion examples/cpu_openvino_diffusion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ defaults:
name: openvino_diffusion

backend:
half: true
device: cpu
export: true
model: stabilityai/stable-diffusion-2-1
half: false # enable half-precision on compatible Intel CPU machines

scenario:
input_shapes:
Expand Down
16 changes: 5 additions & 11 deletions examples/cuda_pytorch_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
PUSH_REPO_ID = os.environ.get("PUSH_REPO_ID", None)


def run_benchmark():
if __name__ == "__main__":
level = os.environ.get("LOG_LEVEL", "INFO")
to_file = os.environ.get("LOG_TO_FILE", "0") == "1"
setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS")

launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn")
backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model=MODEL)
scenario_config = InferenceConfig(memory=True, latency=True, input_shapes={"batch_size": 1, "sequence_length": 128})
Expand All @@ -21,16 +25,6 @@ def run_benchmark():
log_report=True,
)
benchmark_report = Benchmark.launch(benchmark_config)

return benchmark_config, benchmark_report


if __name__ == "__main__":
level = os.environ.get("LOG_LEVEL", "INFO")
to_file = os.environ.get("LOG_TO_FILE", "0") == "1"
setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS")

benchmark_config, benchmark_report = run_benchmark()
benchmark = Benchmark(config=benchmark_config, report=benchmark_report)

if PUSH_REPO_ID is not None:
Expand Down
15 changes: 8 additions & 7 deletions examples/cuda_pytorch_bert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,16 @@ launcher:
device_isolation: true
device_isolation_action: warn

backend:
device: cuda
device_ids: 0
no_weights: true
model: google-bert/bert-base-uncased

scenario:
latency: true
memory: true
latency: true

input_shapes:
batch_size: 1
sequence_length: 128

backend:
device: cuda
device_ids: 0
no_weights: true
model: bert-base-uncased
2 changes: 1 addition & 1 deletion examples/cuda_pytorch_llama_quants.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def run_benchmark(weight_config: str):
duration=10,
iterations=10,
warmup_runs=10,
input_shapes={"batch_size": 1, "sequence_length": 128},
input_shapes={"batch_size": 1, "sequence_length": 64},
generate_kwargs={"max_new_tokens": 32, "min_new_tokens": 32},
)
benchmark_config = BenchmarkConfig(
Expand Down
2 changes: 1 addition & 1 deletion examples/cuda_pytorch_vlm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ scenario:
input_shapes:
# text
batch_size: 1
sequence_length: 256
sequence_length: 64
# image
num_images: 2
num_channels: 3
Expand Down

0 comments on commit ef84a5a

Please sign in to comment.