You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import torch
import copy
import numpy as np
from transformers import CLIPProcessor, CLIPModel
from diffusers import StableDiffusionPipeline
from scipy.cluster.vq import vq, kmeans2
# Konfigurasi
model_path = "path/to/your/sd-v1-5-fp16.safetensors"
output_path = "path/to/your/quantized_model.safetensors"
bits = [1, 2, 3, 4] # Bit-width yang akan diuji
sensitivity_threshold = 0.05
size_factor = 0.5
clip_thresholds = [0.9, 0.95, 0.98]
time_steps = 50
# Fungsi untuk menghitung Mean Squared Error (MSE)
def calculate_mse(image1, image2):
"""Menghitung Mean Squared Error (MSE) antara dua gambar."""
return ((image1 - image2) ** 2).mean()
# Fungsi untuk menghitung CLIP score
def calculate_clip_score(images, texts, clip_processor, clip_model):
"""Menghitung CLIP score untuk gambar yang dihasilkan dan teks prompt."""
inputs = clip_processor(text=texts, images=images, return_tensors="pt", padding=True)
outputs = clip_model(**inputs)
logits_per_image = outputs.logits_per_image
return logits_per_image.diag().mean().item()
# Fungsi untuk menganalisis sensitivitas layer
def analyze_layer_sensitivity(model, prompts, bits, sample_size=100):
"""Menganalisis sensitivitas layer terhadap kuantisasi."""
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
results = {}
for name, param in model.named_parameters():
if "weight" in name:
results[name] = {}
for b in bits:
quantized_model = copy.deepcopy(model)
quantize_layer(quantized_model, name, b)
# Generate gambar dengan model yang telah dikuantisasi (implementasi diperlukan)
generated_images = generate_images(quantized_model, prompts, sample_size)
# Hitung metrik
results[name][b] = {
"mse": calculate_mse(generated_images, generate_images(model, prompts, sample_size)),
"clip_score": calculate_clip_score(generated_images, prompts, clip_processor, clip_model),
"parameters": param.numel()
}
return results
# Fungsi untuk mengkuantisasi layer dengan Lloyd-Max dari SciPy
def quantize_layer(model, layer_name, bits):
"""Menerapkan kuantisasi pada layer tertentu menggunakan algoritma Lloyd-Max dari SciPy.
Args:
model: Model PyTorch yang mengandung layer yang akan di kuantisasikan.
layer_name: Nama layer yang akan di kuantisasikan (string).
bits: Jumlah bit yang digunakan untuk kuantisasi (integer).
"""
# Dapatkan layer berdasarkan namanya
layer = dict(model.named_modules())[layer_name]
# Pastikan layer memiliki bobot
if not hasattr(layer, 'weight'):
print(f"Layer {layer_name} tidak memiliki bobot.")
return
# Dapatkan tensor bobot
w = layer.weight.data
# Lewati kuantisasi jika bobot sudah dalam tipe data integer
if w.dtype not in [torch.float, torch.float16, torch.float32, torch.float64]:
print(f"Layer {layer_name} sudah memiliki bobot integer.")
return
# Konversi tensor ke CPU untuk kuantisasi Lloyd-Max
w = w.cpu().numpy()
# Lakukan kuantisasi Lloyd-Max menggunakan kmeans2 dari SciPy
centroids, labels = kmeans2(w.reshape(-1, 1), 2**bits, minit='points')
quantized_w = centroids[labels].reshape(w.shape)
# Konversi kembali ke tensor PyTorch dan simpan ke layer
layer.weight.data = torch.from_numpy(quantized_w).to(layer.weight.device)
# Hitung dan simpan faktor skala dan titik nol
min_val = w.min()
max_val = w.max()
scale = (max_val - min_val) / (2**bits - 1)
zero_point = torch.round(-min_val / scale).to(layer.weight.device)
layer.quantization_scale = scale.to(layer.weight.device)
layer.quantization_zero_point = zero_point
# Ganti fungsi forward untuk melakukan dekuantisasi saat inferensi
layer._forward_impl = layer.forward
def quantized_forward(*args, **kwargs):
# Dekuantized bobot sebelum operasi forward
dequantized_w = (layer.weight.data - layer.quantization_zero_point) * layer.quantization_scale
return layer._forward_impl(dequantized_w, *args[1:], **kwargs)
layer.forward = quantized_forward
# Fungsi untuk menghasilkan gambar
def generate_images(model, prompts, sample_size):
"""Menghasilkan gambar menggunakan model Stable Diffusion."""
# Implementasikan proses generasi gambar menggunakan pipeline diffusers
# Gunakan `model` dan `prompts` sebagai input
# ...
# Fungsi untuk menentukan strategi mixed-precision
def determine_mixed_precision(results, sensitivity_threshold, size_factor, clip_thresholds):
"""Menentukan bit-width optimal untuk setiap layer."""
mixed_precision = {}
for name, layer_results in results.items():
sensitivity_scores = {
b: layer_results[b]["mse"] / (layer_results[b]["parameters"] ** size_factor)
for b in bits
}
optimal_bits = min(bits, key=lambda b: sensitivity_scores[b])
if sensitivity_scores[optimal_bits] > sensitivity_threshold:
optimal_bits = max(bits) # Gunakan bit-width maksimum jika melebihi ambang batas
clip_score_drop = layer_results[max(bits)]["clip_score"] - results[name][32]["clip_score"]
for i, threshold in enumerate(clip_thresholds):
if clip_score_drop > np.quantile(
[results[n][32]["clip_score"] for n in results], threshold
):
optimal_bits += i + 1
break
mixed_precision[name] = optimal_bits
return mixed_precision
# Fungsi untuk mengkuantisasi model
def quantize_model(model, mixed_precision):
"""Menerapkan kuantisasi pada model berdasarkan strategi mixed-precision."""
for name, param in model.named_parameters():
if "weight" in name and name in mixed_precision:
quantize_layer(model, name, mixed_precision[name])
return model
# Fungsi untuk pre-komputasi dan caching time embedding
def precompute_time_embeddings(model, time_steps):
"""Menghitung dan menyimpan time embedding."""
time_embeddings = {}
for t in range(time_steps):
time_embeddings[t] = model.time_embedding(torch.tensor([t]))
return time_embeddings
# Memuat model Stable Diffusion
pipe = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
model = pipe.unet
# Analisis sensitivitas layer (implementasikan generate_images terlebih dahulu)
prompts = ["A photo of a cat"] # Ganti dengan prompt yang Anda inginkan
results = analyze_layer_sensitivity(model, prompts, bits)
# Menentukan strategi mixed-precision
mixed_precision = determine_mixed_precision(
results, sensitivity_threshold, size_factor, clip_thresholds
)
# Menerapkan kuantisasi pada model
quantized_model = quantize_model(model, mixed_precision)
# Pre-komputasi dan caching time embedding
time_embeddings = precompute_time_embeddings(quantized_model, time_steps)
# Menyimpan model yang telah dikuantisasi
torch.save(quantized_model.state_dict(), output_path)
The text was updated successfully, but these errors were encountered:
The text was updated successfully, but these errors were encountered: