From 361c86f47234f1b8dbd0e7a39a5cbeda840d9645 Mon Sep 17 00:00:00 2001
From: Nihanth Subramanya <nhnt11@gmail.com>
Date: Tue, 7 Nov 2023 17:42:42 +0100
Subject: [PATCH 1/2] Fix dpm++ img2img when there are duped timesteps

---
 .../pipeline_stable_diffusion_xl_img2img.py                 | 3 ++-
 src/diffusers/schedulers/scheduling_dpmsolver_multistep.py  | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
index 02a220fa851b..7c3bf63fbd04 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
@@ -568,7 +568,8 @@ def get_timesteps(self, num_inference_steps, strength, device, denoising_start=N
         else:
             t_start = 0
 
-        timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+        init_timestep_val = self.scheduler.timesteps[t_start * self.scheduler.order]
+        timesteps = self.scheduler.timesteps[self.scheduler.timesteps <= init_timestep_val]
 
         # Strength is irrelevant if we directly request a timestep to start at;
         # that is, strength is determined by the denoising_start instead.
diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
index ad262aeeeeea..b12018899d8e 100644
--- a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
+++ b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
@@ -773,8 +773,8 @@ def _init_step_index(self, timestep):
         # is always the second index (or the last index if there is only 1)
         # This way we can ensure we don't accidentally skip a sigma in
         # case we start in the middle of the denoising schedule (e.g. for image-to-image)
-        elif len(index_candidates) > 1:
-            step_index = index_candidates[1].item()
+        # elif len(index_candidates) > 1:
+        #     step_index = index_candidates[1].item()
         else:
             step_index = index_candidates[0].item()
 
@@ -887,7 +887,7 @@ def add_noise(
             schedule_timesteps = self.timesteps.to(original_samples.device)
             timesteps = timesteps.to(original_samples.device)
 
-        step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
+        step_indices = [(schedule_timesteps == t).nonzero().tolist()[0] for t in timesteps]
 
         sigma = sigmas[step_indices].flatten()
         while len(sigma.shape) < len(original_samples.shape):

From aaf8b4645f01999a893f461abe6b93ad7bb98738 Mon Sep 17 00:00:00 2001
From: Nihanth Subramanya <nhnt11@gmail.com>
Date: Thu, 9 Nov 2023 00:45:20 +0100
Subject: [PATCH 2/2] simplify

---
 .../pipeline_stable_diffusion_xl_img2img.py   | 50 ++++++++-----------
 1 file changed, 20 insertions(+), 30 deletions(-)

diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
index 7c3bf63fbd04..ab54e744b41f 100644
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
@@ -561,41 +561,31 @@ def check_inputs(
                 )
 
     def get_timesteps(self, num_inference_steps, strength, device, denoising_start=None):
-        # get the original timestep using init_timestep
         if denoising_start is None:
-            init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
-            t_start = max(num_inference_steps - init_timestep, 0)
-        else:
-            t_start = 0
+            denoising_start = 1 - strength
 
-        init_timestep_val = self.scheduler.timesteps[t_start * self.scheduler.order]
-        timesteps = self.scheduler.timesteps[self.scheduler.timesteps <= init_timestep_val]
+        timesteps = self.scheduler.timesteps
 
-        # Strength is irrelevant if we directly request a timestep to start at;
-        # that is, strength is determined by the denoising_start instead.
-        if denoising_start is not None:
-            discrete_timestep_cutoff = int(
-                round(
-                    self.scheduler.config.num_train_timesteps
-                    - (denoising_start * self.scheduler.config.num_train_timesteps)
-                )
+        discrete_timestep_cutoff = int(
+            round(
+                self.scheduler.config.num_train_timesteps
+                - (denoising_start * self.scheduler.config.num_train_timesteps)
             )
+        )
 
-            num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
-            if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
-                # if the scheduler is a 2nd order scheduler we might have to do +1
-                # because `num_inference_steps` might be even given that every timestep
-                # (except the highest one) is duplicated. If `num_inference_steps` is even it would
-                # mean that we cut the timesteps in the middle of the denoising step
-                # (between 1st and 2nd devirative) which leads to incorrect results. By adding 1
-                # we ensure that the denoising process always ends after the 2nd derivate step of the scheduler
-                num_inference_steps = num_inference_steps + 1
-
-            # because t_n+1 >= t_n, we slice the timesteps starting from the end
-            timesteps = timesteps[-num_inference_steps:]
-            return timesteps, num_inference_steps
-
-        return timesteps, num_inference_steps - t_start
+        num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
+        if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
+            # if the scheduler is a 2nd order scheduler we might have to do +1
+            # because `num_inference_steps` might be even given that every timestep
+            # (except the highest one) is duplicated. If `num_inference_steps` is even it would
+            # mean that we cut the timesteps in the middle of the denoising step
+            # (between 1st and 2nd devirative) which leads to incorrect results. By adding 1
+            # we ensure that the denoising process always ends after the 2nd derivate step of the scheduler
+            num_inference_steps = num_inference_steps + 1
+
+        # because t_n+1 >= t_n, we slice the timesteps starting from the end
+        timesteps = timesteps[-num_inference_steps:]
+        return timesteps, num_inference_steps
 
     def prepare_latents(
         self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True