From f811b115ba867554e989fbe1fbbf97f8df0f7852 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Fri, 15 Mar 2024 21:05:00 +0900 Subject: [PATCH 1/2] fix sdxl timestep embedding --- README.md | 10 ++++++++++ library/sdxl_original_unet.py | 8 +++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e635e5aed..3639b7be8 100644 --- a/README.md +++ b/README.md @@ -249,6 +249,16 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum ## Change History +### Mar 15, 2024 / 2024/3/15: v0.8.5 + +- Fixed a bug that the value of timestep embedding during SDXL training was incorrect. + - The inference with the generation script is also fixed. + - The impact is unknown, but please update for SDXL training. + +- SDXL 学習時の timestep embedding の値が誤っていたのを修正しました。 + - 生成スクリプトでの推論時についてもあわせて修正しました。 + - 影響の度合いは不明ですが、SDXL の学習時にはアップデートをお願いいたします。 + ### Feb 24, 2024 / 2024/2/24: v0.8.4 - The log output has been improved. PR [#905](https://github.com/kohya-ss/sd-scripts/pull/905) Thanks to shirayu! diff --git a/library/sdxl_original_unet.py b/library/sdxl_original_unet.py index 673cf9f65..17c345a89 100644 --- a/library/sdxl_original_unet.py +++ b/library/sdxl_original_unet.py @@ -31,8 +31,10 @@ from torch.nn import functional as F from einops import rearrange from .utils import setup_logging + setup_logging() import logging + logger = logging.getLogger(__name__) IN_CHANNELS: int = 4 @@ -1074,7 +1076,7 @@ def forward(self, x, timesteps=None, context=None, y=None, **kwargs): timesteps = timesteps.expand(x.shape[0]) hs = [] - t_emb = get_timestep_embedding(timesteps, self.model_channels) # , repeat_only=False) + t_emb = get_timestep_embedding(timesteps, self.model_channels, downscale_freq_shift=0) # , repeat_only=False) t_emb = t_emb.to(x.dtype) emb = self.time_embed(t_emb) @@ -1132,7 +1134,7 @@ def __init__(self, original_unet: SdxlUNet2DConditionModel, **kwargs): # call original model's methods def __getattr__(self, name): return getattr(self.delegate, name) - + def __call__(self, *args, **kwargs): return self.delegate(*args, **kwargs) @@ -1164,7 +1166,7 @@ def forward(self, x, timesteps=None, context=None, y=None, **kwargs): timesteps = timesteps.expand(x.shape[0]) hs = [] - t_emb = get_timestep_embedding(timesteps, _self.model_channels) # , repeat_only=False) + t_emb = get_timestep_embedding(timesteps, _self.model_channels, downscale_freq_shift=0) # , repeat_only=False) t_emb = t_emb.to(x.dtype) emb = _self.time_embed(t_emb) From 443f02942cfefd6e1899849f563580508d118ce0 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Fri, 15 Mar 2024 21:35:14 +0900 Subject: [PATCH 2/2] fix doc --- README.md | 106 ------------------------------------------------------ 1 file changed, 106 deletions(-) diff --git a/README.md b/README.md index cc5aca505..927d7a42e 100644 --- a/README.md +++ b/README.md @@ -355,112 +355,6 @@ It becomes `1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best `1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` や `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` などになります。 -### Working in progress - -- Colab seems to stop with log output. Try specifying `--console_log_simple` option in the training script to disable rich logging. -- `train_network.py` and `sdxl_train_network.py` are modified to record some dataset settings in the metadata of the trained model (`caption_prefix`, `caption_suffix`, `keep_tokens_separator`, `secondary_separator`, `enable_wildcard`). -- Some features are added to the dataset subset settings. - - `secondary_separator` is added to specify the tag separator that is not the target of shuffling or dropping. - - Specify `secondary_separator=";;;"`. When you specify `secondary_separator`, the part is not shuffled or dropped. See the example below. - - `enable_wildcard` is added. When set to `true`, the wildcard notation `{aaa|bbb|ccc}` can be used. See the example below. - - `keep_tokens_separator` is updated to be used twice in the caption. When you specify `keep_tokens_separator="|||"`, the part divided by the second `|||` is not shuffled or dropped and remains at the end. - - The existing features `caption_prefix` and `caption_suffix` can be used together. `caption_prefix` and `caption_suffix` are processed first, and then `enable_wildcard`, `keep_tokens_separator`, shuffling and dropping, and `secondary_separator` are processed in order. - - The examples are [shown below](#example-of-dataset-settings--データセット設定の記述例). - - -- Colab での動作時、ログ出力で停止してしまうようです。学習スクリプトに `--console_log_simple` オプションを指定し、rich のロギングを無効してお試しください。 -- `train_network.py` および `sdxl_train_network.py` で、学習したモデルのメタデータに一部のデータセット設定が記録されるよう修正しました(`caption_prefix`、`caption_suffix`、`keep_tokens_separator`、`secondary_separator`、`enable_wildcard`)。 -- データセットのサブセット設定にいくつかの機能を追加しました。 - - シャッフルの対象とならないタグ分割識別子の指定 `secondary_separator` を追加しました。`secondary_separator=";;;"` のように指定します。`secondary_separator` で区切ることで、その部分はシャッフル、drop 時にまとめて扱われます。詳しくは記述例をご覧ください。 - - `enable_wildcard` を追加しました。`true` にするとワイルドカード記法 `{aaa|bbb|ccc}` が使えます。詳しくは記述例をご覧ください。 - - `keep_tokens_separator` をキャプション内に 2 つ使えるようにしました。たとえば `keep_tokens_separator="|||"` と指定したとき、`1girl, hatsune miku, vocaloid ||| stage, mic ||| best quality, rating: general` とキャプションを指定すると、二番目の `|||` で分割された部分はシャッフル、drop されず末尾に残ります。 - - 既存の機能 `caption_prefix` と `caption_suffix` とあわせて使えます。`caption_prefix` と `caption_suffix` は一番最初に処理され、その後、ワイルドカード、`keep_tokens_separator`、シャッフルおよび drop、`secondary_separator` の順に処理されます。 - -#### Example of dataset settings / データセット設定の記述例: - -```toml -[general] -flip_aug = true -color_aug = false -resolution = [1024, 1024] - -[[datasets]] -batch_size = 6 -enable_bucket = true -bucket_no_upscale = true -caption_extension = ".txt" -keep_tokens_separator= "|||" -shuffle_caption = true -caption_tag_dropout_rate = 0.1 -secondary_separator = ";;;" # subset 側に書くこともできます / can be written in the subset side -enable_wildcard = true # 同上 / same as above - - [[datasets.subsets]] - image_dir = "/path/to/image_dir" - num_repeats = 1 - - # ||| の前後はカンマは不要です(自動的に追加されます) / No comma is required before and after ||| (it is added automatically) - caption_prefix = "1girl, hatsune miku, vocaloid |||" - - # ||| の後はシャッフル、drop されず残ります / After |||, it is not shuffled or dropped and remains - # 単純に文字列として連結されるので、カンマなどは自分で入れる必要があります / It is simply concatenated as a string, so you need to put commas yourself - caption_suffix = ", anime screencap ||| masterpiece, rating: general" -``` - -#### Example of caption, secondary_separator notation: `secondary_separator = ";;;"` - -```txt -1girl, hatsune miku, vocaloid, upper body, looking at viewer, sky;;;cloud;;;day, outdoors -``` -The part `sky;;;cloud;;;day` is replaced with `sky,cloud,day` without shuffling or dropping. When shuffling and dropping are enabled, it is processed as a whole (as one tag). For example, it becomes `vocaloid, 1girl, upper body, sky,cloud,day, outdoors, hatsune miku` (shuffled) or `vocaloid, 1girl, outdoors, looking at viewer, upper body, hatsune miku` (dropped). - -#### Example of caption, enable_wildcard notation: `enable_wildcard = true` - -```txt -1girl, hatsune miku, vocaloid, upper body, looking at viewer, {simple|white} background -``` -`simple` or `white` is randomly selected, and it becomes `simple background` or `white background`. - -```txt -1girl, hatsune miku, vocaloid, {{retro style}} -``` -If you want to include `{` or `}` in the tag string, double them like `{{` or `}}` (in this example, the actual caption used for training is `{retro style}`). - -#### Example of caption, `keep_tokens_separator` notation: `keep_tokens_separator = "|||"` - -```txt -1girl, hatsune miku, vocaloid ||| stage, microphone, white shirt, smile ||| best quality, rating: general -``` -It becomes `1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` or `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` etc. - - -#### キャプション記述例、secondary_separator 記法:`secondary_separator = ";;;"` の場合 - -```txt -1girl, hatsune miku, vocaloid, upper body, looking at viewer, sky;;;cloud;;;day, outdoors -``` -`sky;;;cloud;;;day` の部分はシャッフル、drop されず `sky,cloud,day` に置換されます。シャッフル、drop が有効な場合、まとめて(一つのタグとして)処理されます。つまり `vocaloid, 1girl, upper body, sky,cloud,day, outdoors, hatsune miku` (シャッフル)や `vocaloid, 1girl, outdoors, looking at viewer, upper body, hatsune miku` (drop されたケース)などになります。 - -#### キャプション記述例、ワイルドカード記法: `enable_wildcard = true` の場合 - -```txt -1girl, hatsune miku, vocaloid, upper body, looking at viewer, {simple|white} background -``` -ランダムに `simple` または `white` が選ばれ、`simple background` または `white background` になります。 - -```txt -1girl, hatsune miku, vocaloid, {{retro style}} -``` -タグ文字列に `{` や `}` そのものを含めたい場合は `{{` や `}}` のように二つ重ねてください(この例では実際に学習に用いられるキャプションは `{retro style}` になります)。 - -#### キャプション記述例、`keep_tokens_separator` 記法: `keep_tokens_separator = "|||"` の場合 - -```txt -1girl, hatsune miku, vocaloid ||| stage, microphone, white shirt, smile ||| best quality, rating: general -``` -`1girl, hatsune miku, vocaloid, microphone, stage, white shirt, best quality, rating: general` や `1girl, hatsune miku, vocaloid, white shirt, smile, stage, microphone, best quality, rating: general` などになります。 - - ### Mar 15, 2024 / 2024/3/15: v0.8.5 - Fixed a bug that the value of timestep embedding during SDXL training was incorrect.