-
Notifications
You must be signed in to change notification settings - Fork 306
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add PhotoMaker Version 2 support (#358)
* first attempt at updating to photomaker v2 * continue adding photomaker v2 modules * finishing the last few pieces for photomaker v2; id_embeds need to be done by a manual step and pass as an input file * added a name converter for Photomaker V2; build ok * more debugging underway * failing at cuda mat_mul * updated chunk_half to be more efficient; redo feedforward * fixed a bug: carefully using ggml_view_4d to get chunks of a tensor; strides need to be recalculated or set properly; still failing at soft_max cuda op * redo weight calculation and weight*v * fixed a bug now Photomaker V2 kinds of working * add python script for face detection (Photomaker V2 needs) * updated readme for photomaker * fixed a bug causing PMV1 crashing; both V1 and V2 work * fixed clean_input_ids for PMV2 * fixed a double counting bug in tokenize_with_trigger_token * updated photomaker readme * removed some commented code * improved reconstructing class word free prompt * changed reading id_embed to raw binary using existing load tensor function; this is more efficient than using model load and also makes it easier to work with sd server * minor clean up --------- Co-authored-by: bssrdf <[email protected]>
- Loading branch information
Showing
11 changed files
with
845 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import os | ||
import sys | ||
|
||
import numpy as np | ||
import torch | ||
from diffusers.utils import load_image | ||
# pip install insightface==0.7.3 | ||
from insightface.app import FaceAnalysis | ||
from insightface.data import get_image as ins_get_image | ||
from safetensors.torch import save_file | ||
|
||
### | ||
# https://github.com/cubiq/ComfyUI_IPAdapter_plus/issues/165#issue-2055829543 | ||
### | ||
class FaceAnalysis2(FaceAnalysis): | ||
# NOTE: allows setting det_size for each detection call. | ||
# the model allows it but the wrapping code from insightface | ||
# doesn't show it, and people end up loading duplicate models | ||
# for different sizes where there is absolutely no need to | ||
def get(self, img, max_num=0, det_size=(640, 640)): | ||
if det_size is not None: | ||
self.det_model.input_size = det_size | ||
|
||
return super().get(img, max_num) | ||
|
||
def analyze_faces(face_analysis: FaceAnalysis, img_data: np.ndarray, det_size=(640, 640)): | ||
# NOTE: try detect faces, if no faces detected, lower det_size until it does | ||
detection_sizes = [None] + [(size, size) for size in range(640, 256, -64)] + [(256, 256)] | ||
|
||
for size in detection_sizes: | ||
faces = face_analysis.get(img_data, det_size=size) | ||
if len(faces) > 0: | ||
return faces | ||
|
||
return [] | ||
|
||
if __name__ == "__main__": | ||
#face_detector = FaceAnalysis2(providers=['CUDAExecutionProvider'], allowed_modules=['detection', 'recognition']) | ||
face_detector = FaceAnalysis2(providers=['CPUExecutionProvider'], allowed_modules=['detection', 'recognition']) | ||
face_detector.prepare(ctx_id=0, det_size=(640, 640)) | ||
#input_folder_name = './scarletthead_woman' | ||
input_folder_name = sys.argv[1] | ||
image_basename_list = os.listdir(input_folder_name) | ||
image_path_list = sorted([os.path.join(input_folder_name, basename) for basename in image_basename_list]) | ||
|
||
input_id_images = [] | ||
for image_path in image_path_list: | ||
input_id_images.append(load_image(image_path)) | ||
|
||
id_embed_list = [] | ||
|
||
for img in input_id_images: | ||
img = np.array(img) | ||
img = img[:, :, ::-1] | ||
faces = analyze_faces(face_detector, img) | ||
if len(faces) > 0: | ||
id_embed_list.append(torch.from_numpy((faces[0]['embedding']))) | ||
|
||
if len(id_embed_list) == 0: | ||
raise ValueError(f"No face detected in input image pool") | ||
|
||
id_embeds = torch.stack(id_embed_list) | ||
|
||
# for r in id_embeds: | ||
# print(r) | ||
# #torch.save(id_embeds, input_folder_name+'/id_embeds.pt'); | ||
# weights = dict() | ||
# weights["id_embeds"] = id_embeds | ||
# save_file(weights, input_folder_name+'/id_embeds.safetensors') | ||
|
||
binary_data = id_embeds.numpy().tobytes() | ||
two = 4 | ||
zero = 0 | ||
one = 1 | ||
tensor_name = "id_embeds" | ||
# Write binary data to a file | ||
with open(input_folder_name+'/id_embeds.bin', "wb") as f: | ||
f.write(two.to_bytes(4, byteorder='little')) | ||
f.write((len(tensor_name)).to_bytes(4, byteorder='little')) | ||
f.write(zero.to_bytes(4, byteorder='little')) | ||
f.write((id_embeds.shape[1]).to_bytes(4, byteorder='little')) | ||
f.write((id_embeds.shape[0]).to_bytes(4, byteorder='little')) | ||
f.write(one.to_bytes(4, byteorder='little')) | ||
f.write(one.to_bytes(4, byteorder='little')) | ||
f.write(tensor_name.encode('ascii')) | ||
f.write(binary_data) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.