From 361c86f47234f1b8dbd0e7a39a5cbeda840d9645 Mon Sep 17 00:00:00 2001 From: Nihanth Subramanya Date: Tue, 7 Nov 2023 17:42:42 +0100 Subject: [PATCH 1/2] Fix dpm++ img2img when there are duped timesteps --- .../pipeline_stable_diffusion_xl_img2img.py | 3 ++- src/diffusers/schedulers/scheduling_dpmsolver_multistep.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index 02a220fa851b..7c3bf63fbd04 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -568,7 +568,8 @@ def get_timesteps(self, num_inference_steps, strength, device, denoising_start=N else: t_start = 0 - timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :] + init_timestep_val = self.scheduler.timesteps[t_start * self.scheduler.order] + timesteps = self.scheduler.timesteps[self.scheduler.timesteps <= init_timestep_val] # Strength is irrelevant if we directly request a timestep to start at; # that is, strength is determined by the denoising_start instead. diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py index ad262aeeeeea..b12018899d8e 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py @@ -773,8 +773,8 @@ def _init_step_index(self, timestep): # is always the second index (or the last index if there is only 1) # This way we can ensure we don't accidentally skip a sigma in # case we start in the middle of the denoising schedule (e.g. for image-to-image) - elif len(index_candidates) > 1: - step_index = index_candidates[1].item() + # elif len(index_candidates) > 1: + # step_index = index_candidates[1].item() else: step_index = index_candidates[0].item() @@ -887,7 +887,7 @@ def add_noise( schedule_timesteps = self.timesteps.to(original_samples.device) timesteps = timesteps.to(original_samples.device) - step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps] + step_indices = [(schedule_timesteps == t).nonzero().tolist()[0] for t in timesteps] sigma = sigmas[step_indices].flatten() while len(sigma.shape) < len(original_samples.shape): From aaf8b4645f01999a893f461abe6b93ad7bb98738 Mon Sep 17 00:00:00 2001 From: Nihanth Subramanya Date: Thu, 9 Nov 2023 00:45:20 +0100 Subject: [PATCH 2/2] simplify --- .../pipeline_stable_diffusion_xl_img2img.py | 50 ++++++++----------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index 7c3bf63fbd04..ab54e744b41f 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -561,41 +561,31 @@ def check_inputs( ) def get_timesteps(self, num_inference_steps, strength, device, denoising_start=None): - # get the original timestep using init_timestep if denoising_start is None: - init_timestep = min(int(num_inference_steps * strength), num_inference_steps) - t_start = max(num_inference_steps - init_timestep, 0) - else: - t_start = 0 + denoising_start = 1 - strength - init_timestep_val = self.scheduler.timesteps[t_start * self.scheduler.order] - timesteps = self.scheduler.timesteps[self.scheduler.timesteps <= init_timestep_val] + timesteps = self.scheduler.timesteps - # Strength is irrelevant if we directly request a timestep to start at; - # that is, strength is determined by the denoising_start instead. - if denoising_start is not None: - discrete_timestep_cutoff = int( - round( - self.scheduler.config.num_train_timesteps - - (denoising_start * self.scheduler.config.num_train_timesteps) - ) + discrete_timestep_cutoff = int( + round( + self.scheduler.config.num_train_timesteps + - (denoising_start * self.scheduler.config.num_train_timesteps) ) + ) - num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item() - if self.scheduler.order == 2 and num_inference_steps % 2 == 0: - # if the scheduler is a 2nd order scheduler we might have to do +1 - # because `num_inference_steps` might be even given that every timestep - # (except the highest one) is duplicated. If `num_inference_steps` is even it would - # mean that we cut the timesteps in the middle of the denoising step - # (between 1st and 2nd devirative) which leads to incorrect results. By adding 1 - # we ensure that the denoising process always ends after the 2nd derivate step of the scheduler - num_inference_steps = num_inference_steps + 1 - - # because t_n+1 >= t_n, we slice the timesteps starting from the end - timesteps = timesteps[-num_inference_steps:] - return timesteps, num_inference_steps - - return timesteps, num_inference_steps - t_start + num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item() + if self.scheduler.order == 2 and num_inference_steps % 2 == 0: + # if the scheduler is a 2nd order scheduler we might have to do +1 + # because `num_inference_steps` might be even given that every timestep + # (except the highest one) is duplicated. If `num_inference_steps` is even it would + # mean that we cut the timesteps in the middle of the denoising step + # (between 1st and 2nd devirative) which leads to incorrect results. By adding 1 + # we ensure that the denoising process always ends after the 2nd derivate step of the scheduler + num_inference_steps = num_inference_steps + 1 + + # because t_n+1 >= t_n, we slice the timesteps starting from the end + timesteps = timesteps[-num_inference_steps:] + return timesteps, num_inference_steps def prepare_latents( self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True