Skip to content

Commit

Permalink
ssd_vgg300, ssdlite mobilenetv3, yolov8 repro
Browse files Browse the repository at this point in the history
  • Loading branch information
daniil-lyakhov committed Jun 21, 2024
1 parent d45a55f commit 16b3126
Show file tree
Hide file tree
Showing 3 changed files with 297 additions and 26 deletions.
111 changes: 86 additions & 25 deletions examples/post_training_quantization/openvino/yolov8/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import openvino.torch # noqa
import torch
from torch._export import capture_pre_autograd_graph
from torch.export import Dim # noqa
from torch.fx.passes.graph_drawer import FxGraphDrawer
from tqdm import tqdm
from ultralytics.cfg import get_cfg
Expand All @@ -35,6 +36,7 @@
from ultralytics.utils import DATASETS_DIR
from ultralytics.utils import DEFAULT_CFG
from ultralytics.utils.metrics import ConfusionMatrix
from ultralytics.utils.torch_utils import de_parallel

import nncf

Expand All @@ -53,13 +55,24 @@ def measure_time(model, example_inputs, num_iters=500):
return average_time


def validate_fx_ult_method(model: ov.Model) -> Tuple[Dict, int, int]:
"""
Uses .val ultralitics method instead of a dataloader loop.
For some reason this shows better metrics on torch.compiled models
"""
yolo = YOLO(f"{ROOT}/{MODEL_NAME}.pt")
yolo.model = model
result = yolo.val(data="coco128.yaml", batch=1, rect=False)
return result.results_dict


def validate_fx(
model: ov.Model, data_loader: torch.utils.data.DataLoader, validator: Validator, num_samples: int = None
) -> Tuple[Dict, int, int]:
validator.seen = 0
validator.jdict = []
validator.stats = []
validator.confusion_matrix = ConfusionMatrix(nc=validator.nc)
# validator.seen = 0
# validator.jdict = []
# validator.stats = []
# validator.confusion_matrix = ConfusionMatrix(nc=validator.nc)
for batch_i, batch in enumerate(data_loader):
if num_samples is not None and batch_i == num_samples:
break
Expand All @@ -71,7 +84,20 @@ def validate_fx(
return stats, validator.seen, validator.nt_per_class.sum()


def validate(
def print_statistics_short(stats: np.ndarray) -> None:
mp, mr, map50, mean_ap = (
stats["metrics/precision(B)"],
stats["metrics/recall(B)"],
stats["metrics/mAP50(B)"],
stats["metrics/mAP50-95(B)"],
)
s = ("%20s" + "%12s" * 4) % ("Class", "Precision", "Recall", "[email protected]", "[email protected]:.95")
print(s)
pf = "%20s" + "%12.3g" * 4 # print format
print(pf % ("all", mp, mr, map50, mean_ap))


def validate_ov(
model: ov.Model, data_loader: torch.utils.data.DataLoader, validator: Validator, num_samples: int = None
) -> Tuple[Dict, int, int]:
validator.seen = 0
Expand Down Expand Up @@ -105,6 +131,23 @@ def print_statistics(stats: np.ndarray, total_images: int, total_objects: int) -
print(pf % ("all", total_images, total_objects, mp, mr, map50, mean_ap))


def prepare_validation_new(model: YOLO, data: str) -> Tuple[Validator, torch.utils.data.DataLoader]:
# custom = {"rect": True, "batch": 1} # method defaults
# rect: false forces to resize all input pictures to one size
custom = {"rect": False, "batch": 1} # method defaults
args = {**model.overrides, **custom, "mode": "val"} # highest priority args on the right

validator = model._smart_load("validator")(args=args, _callbacks=model.callbacks)
stride = 32 # default stride
validator.stride = stride # used in get_dataloader() for padding
validator.data = check_det_dataset(data)
validator.init_metrics(de_parallel(model))

data_loader = validator.get_dataloader(validator.data.get(validator.args.split), validator.args.batch)

return validator, data_loader


def prepare_validation(model: YOLO, args: Any) -> Tuple[Validator, torch.utils.data.DataLoader]:
validator = model.smart_load("validator")(args)
validator.data = check_det_dataset(args.data)
Expand Down Expand Up @@ -236,49 +279,65 @@ def transform_fn(x):


TORCH_FX = True
MODEL_NAME = "yolov8n"


def main():
MODEL_NAME = "yolov8n"

model = YOLO(f"{ROOT}/{MODEL_NAME}.pt")
args = get_cfg(cfg=DEFAULT_CFG)
args.data = "coco128.yaml"

# args = get_cfg(cfg=DEFAULT_CFG)
# args.data = "coco128.yaml"
# Prepare validation dataset and helper
validator, data_loader = prepare_validation(model, args)

validator, data_loader = prepare_validation_new(model, "coco128.yaml")

# Convert to OpenVINO model
if TORCH_FX:
batch = next(iter(data_loader))
batch = validator.preprocess(batch)

fp_stats, total_images, total_objects = validate_fx(model.model, tqdm(data_loader), validator)
print("Floating-point Torch model validation results:")
print_statistics(fp_stats, total_images, total_objects)

fp32_compiled_model = torch.compile(model.model, backend="openvino")
fp32_stats, total_images, total_objects = validate_fx(fp32_compiled_model, tqdm(data_loader), validator)
print("FP32 FX model validation results:")
print_statistics(fp32_stats, total_images, total_objects)

# result = validate_fx_ult_method(fp32_compiled_model)
# print("FX FP32 model .val validation")
# print_statistics_short(result)

print("Start quantization...")
# Rebuild model to reset ultralitics cache
model = YOLO(f"{ROOT}/{MODEL_NAME}.pt")
with torch.no_grad():
# fp_stats, total_images, total_object = validate(model.model, tqdm(data_loader), validator)
# print("Floating-point model validation results:")
# print_statistics(fp_stats, total_images, total_objects)
model.model.eval()
model.model(batch["img"])
exported_model = capture_pre_autograd_graph(model.model, args=(batch["img"],))
# dynamic_shapes = ((None, None, Dim("H", min=1, max=29802), Dim("W", min=1, max=29802)),)
dynamic_shapes = ((None, None, None, None),)
exported_model = capture_pre_autograd_graph(
model.model, args=(batch["img"],), dynamic_shapes=dynamic_shapes
)
quantized_model = quantize_impl(deepcopy(exported_model), data_loader, validator)

fp32_compiled_model = torch.compile(exported_model, backend="openvino")
fp32_stats, total_images, total_objects = validate_fx(fp32_compiled_model, tqdm(data_loader), validator)
# fp32_stats, total_images, total_objects = validate_fx(model.model, tqdm(data_loader), validator)
print("FP32 model validation results:")
print_statistics(fp32_stats, total_images, total_objects)
# result = validate_fx_ult_method(quantized_model)
# print("FX INT8 model .val validation")
# print_statistics_short(result)

int8_stats, total_images, total_objects = validate_fx(quantized_model, tqdm(data_loader), validator)
print("INT8 model validation results:")
print("INT8 FX model validation results:")
print_statistics(int8_stats, total_images, total_objects)

print("Start fp32 model benchmarking...")
print("Start FX fp32 model benchmarking...")
fp32_latency = measure_time(fp32_compiled_model, (batch["img"],))
print(f"fp32 latency: {fp32_latency}")
print(f"fp32 FX latency: {fp32_latency}")

print("Start int8 model benchmarking...")
print("Start FX int8 model benchmarking...")
int8_latency = measure_time(quantized_model, (batch["img"],))
print(f"int8 latency: {int8_latency}")
print(f"FX int8 latency: {int8_latency}")
print(f"Speed up: {fp32_latency / int8_latency}")
return

Expand All @@ -289,13 +348,15 @@ def main():
quantized_model_path = Path(f"{ROOT}/{MODEL_NAME}_openvino_model/{MODEL_NAME}_quantized.xml")
ov.save_model(quantized_model, str(quantized_model_path), compress_to_fp16=False)

args = get_cfg(cfg=DEFAULT_CFG)
args.data = "coco128.yaml"
# Validate FP32 model
fp_stats, total_images, total_objects = validate(ov_model, tqdm(data_loader), validator)
fp_stats, total_images, total_objects = validate_ov(ov_model, tqdm(data_loader), validator)
print("Floating-point model validation results:")
print_statistics(fp_stats, total_images, total_objects)

# Validate quantized model
q_stats, total_images, total_objects = validate(quantized_model, tqdm(data_loader), validator)
q_stats, total_images, total_objects = validate_ov(quantized_model, tqdm(data_loader), validator)
print("Quantized model validation results:")
print_statistics(q_stats, total_images, total_objects)

Expand Down
126 changes: 125 additions & 1 deletion examples/post_training_quantization/torch/ssd300_vgg16/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import nncf
from nncf.torch import disable_tracing

from torch._export import capture_pre_autograd_graph
import openvino as ov
import torch
import torchvision
Expand All @@ -27,7 +28,9 @@
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from torchvision.models.detection.ssd import SSD
from torchvision.models.detection.ssd import GeneralizedRCNNTransform
from torchvision.transforms.functional import pil_to_tensor
from torchvision.models.detection.anchor_utils import DefaultBoxGenerator
from torch.export import Dim
from nncf.common.logging.track_progress import track
from functools import partial

Expand Down Expand Up @@ -118,6 +121,7 @@ def validate(model: torch.nn.Module, dataset: COCO128Dataset, device: torch.devi
metric = MeanAveragePrecision()
with torch.no_grad():
for img, target in track(dataset, description="Validating"):
print(img.shape)
prediction = model(img.to(device)[None])[0]
for k in prediction:
prediction[k] = prediction[k].to(torch.device("cpu"))
Expand All @@ -135,16 +139,38 @@ def transform_fn(data_item: Tuple[torch.Tensor, Dict], device: torch.device) ->
def main():
# Download and prepare the COCO128 dataset
dataset_path = download_dataset()
# weights = torchvision.models.detection.SSDLite320_MobileNet_V3_Large_Weights.DEFAULT
# transform = weights.transforms()
weights_name = "SSD300_VGG16_Weights.DEFAULT"
transform = torchvision.models.get_weight(weights_name).transforms()
dataset = COCO128Dataset(dataset_path, lambda img, target: (transform(img), target))

# Get the pretrained ssd300_vgg16 model from torchvision.models
model = torchvision.models.get_model("ssd300_vgg16", weights=weights_name)
# model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights=weights)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)
model.eval()

calibration_dataset = nncf.Dataset(dataset, partial(transform_fn, device=device))

inp = next(iter(calibration_dataset.get_inference_data()))
# dynamic_shapes = ((None, None, Dim("H"), Dim("W")),)
dynamic_shapes = ((None, None, None, None),)
# dynamic_shapes = ((Dim("batch"), None, None, None),)
_ = model(inp)
# r = validate(model, dataset, device)
# print(r)
compiled_model = capture_pre_autograd_graph(model, args=(inp,), dynamic_shapes=dynamic_shapes)
# compiled_model = torch.compile(model)
print("torch model")
r = validate(model, dataset, device)
print(f"mAP @ 0.5: {r:.3f}")
print("compiled model")
r = validate(compiled_model, dataset, device)
print(f"mAP @ 0.5: {r:.3f}")
return

# Disable NNCF tracing for some methods in order for the model to be properly traced by NNCF
disable_tracing(GeneralizedRCNNTransform.normalize)
disable_tracing(SSD.postprocess_detections)
Expand Down Expand Up @@ -198,5 +224,103 @@ def main():
return fp32_map, int8_map, fp32_fps, int8_fps, fp32_model_size, int8_model_size


def validate_detr(model: torch.nn.Module, dataset: COCO128Dataset, device: torch.device, processor):
model.to(device)
metric = MeanAveragePrecision()
min_h = 1000000
max_h = 0
min_w = 1000000
max_w = 0
with torch.no_grad():
for img, target in track(dataset, description="Validating"):

inputs = pil_to_tensor(img)
if inputs.shape[0] == 1:
inputs = torch.cat([inputs] * 3)
inputs = inputs[None]

inputs = processor(images=inputs, return_tensors="pt")
min_h = min(min_h, inputs["pixel_values"].shape[2])
max_h = max(max_h, inputs["pixel_values"].shape[2])
min_w = min(min_w, inputs["pixel_values"].shape[3])
max_w = max(max_w, inputs["pixel_values"].shape[3])

output = model(**inputs)
target_sizes = torch.tensor([img.size[::-1]])
prediction = processor.post_process_object_detection(output, target_sizes=target_sizes, threshold=0.9)[0]
for k in prediction:
prediction[k] = prediction[k].to(torch.device("cpu"))
metric.update([prediction], [target])
computed_metrics = metric.compute()
print(min_h, max_h, min_w, max_w)
return computed_metrics["map_50"]


def get_dert_inputs(processor, dataset):
img = next(iter(dataset))[0]
inputs = pil_to_tensor(img)
inputs = inputs[None]
return processor(images=inputs, return_tensors="pt")


def get_image():
from PIL import Image
import requests

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

return image


def main_detr():
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch

device = torch.device("cpu")
# you can specify the revision tag if you don't want the timm dependency
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
model.eval()

dataset_path = download_dataset()
dataset = COCO128Dataset(dataset_path, lambda img, target: (img, target))

h, w = Dim("H", min=454, max=1333), Dim("W", min=748, max=1333)
dynamic_shapes = {"pixel_values": {2: h, 3: w}, "pixel_mask": {2: h, 3: w}}
dynamic_shapes = ((None, None, h, w), (None, h, w))
ex_inputs = get_dert_inputs(processor, dataset)
# captured_model = capture_pre_autograd_graph(model, args=(), kwargs=ex_inputs, dynamic_shapes=dynamic_shapes)
# captured_model = capture_pre_autograd_graph(model, args=(tuple(ex_inputs.values()),),
# dynamic_shapes=dynamic_shapes)
# captured_model = capture_pre_autograd_graph(model, args=tuple(ex_inputs.values()))
captured_model = capture_pre_autograd_graph(model, args=tuple(ex_inputs.values()), dynamic_shapes=dynamic_shapes)
# captured_model = capture_pre_autograd_graph(model,args=(), kwargs=ex_inputs)

# compiled_model = torch.compile(model, dynamic=True)
# r = validate_detr(compiled_model, dataset, device, processor)
r = validate_detr(captured_model, dataset, device, processor)
print(f"mAP @ 0.5: {r:.3f}")
r = validate_detr(model, dataset, device, processor)
print(f"mAP @ 0.5: {r:.3f}")

outputs = model(**ex_inputs)

# convert outputs (bounding boxes and class logits) to COCO API
# let's only keep detections with score > 0.9
image = get_image()
processor(images=image, return_tensors="pt")
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
box = [round(i, 2) for i in box.tolist()]
print(
f"Detected {model.config.id2label[label.item()]} with confidence "
f"{round(score.item(), 3)} at location {box}"
)


if __name__ == "__main__":
main()
# main()
main_detr()
Loading

0 comments on commit 16b3126

Please sign in to comment.