diff --git a/README.md b/README.md index 0a1461905..e5f8e7cec 100644 --- a/README.md +++ b/README.md @@ -526,6 +526,16 @@ It becomes `1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best `1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` や `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` などになります。 +### Mar 15, 2024 / 2024/3/15: v0.8.5 + +- Fixed a bug that the value of timestep embedding during SDXL training was incorrect. + - The inference with the generation script is also fixed. + - The impact is unknown, but please update for SDXL training. + +- SDXL 学習時の timestep embedding の値が誤っていたのを修正しました。 + - 生成スクリプトでの推論時についてもあわせて修正しました。 + - 影響の度合いは不明ですが、SDXL の学習時にはアップデートをお願いいたします。 + ### Feb 24, 2024 / 2024/2/24: v0.8.4 - The log output has been improved. PR [#905](https://github.com/kohya-ss/sd-scripts/pull/905) Thanks to shirayu! diff --git a/library/sdxl_original_unet.py b/library/sdxl_original_unet.py index 673cf9f65..17c345a89 100644 --- a/library/sdxl_original_unet.py +++ b/library/sdxl_original_unet.py @@ -31,8 +31,10 @@ from torch.nn import functional as F from einops import rearrange from .utils import setup_logging + setup_logging() import logging + logger = logging.getLogger(__name__) IN_CHANNELS: int = 4 @@ -1074,7 +1076,7 @@ def forward(self, x, timesteps=None, context=None, y=None, **kwargs): timesteps = timesteps.expand(x.shape[0]) hs = [] - t_emb = get_timestep_embedding(timesteps, self.model_channels) # , repeat_only=False) + t_emb = get_timestep_embedding(timesteps, self.model_channels, downscale_freq_shift=0) # , repeat_only=False) t_emb = t_emb.to(x.dtype) emb = self.time_embed(t_emb) @@ -1132,7 +1134,7 @@ def __init__(self, original_unet: SdxlUNet2DConditionModel, **kwargs): # call original model's methods def __getattr__(self, name): return getattr(self.delegate, name) - + def __call__(self, *args, **kwargs): return self.delegate(*args, **kwargs) @@ -1164,7 +1166,7 @@ def forward(self, x, timesteps=None, context=None, y=None, **kwargs): timesteps = timesteps.expand(x.shape[0]) hs = [] - t_emb = get_timestep_embedding(timesteps, _self.model_channels) # , repeat_only=False) + t_emb = get_timestep_embedding(timesteps, _self.model_channels, downscale_freq_shift=0) # , repeat_only=False) t_emb = t_emb.to(x.dtype) emb = _self.time_embed(t_emb)