Fix pipeline dtype unexpected change when using SDXL reference community pipelines in float16 mode (#10670)

Fix pipeline dtype unexpected change when using SDXL reference community pipelines
2026-05-28 00:39:35 +08:00 · 2025-01-28 14:46:41 +01:00
parent 7b100ce589
commit 196aef5a6f
2 changed files with 14 additions and 2 deletions
--- a/examples/community/stable_diffusion_xl_controlnet_reference.py
+++ b/examples/community/stable_diffusion_xl_controlnet_reference.py
@@ -193,7 +193,8 @@ class StableDiffusionXLControlNetReferencePipeline(StableDiffusionXLControlNetPi

    def prepare_ref_latents(self, refimage, batch_size, dtype, device, generator, do_classifier_free_guidance):
        refimage = refimage.to(device=device)
-        if self.vae.dtype == torch.float16 and self.vae.config.force_upcast:
+        needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+        if needs_upcasting:
            self.upcast_vae()
            refimage = refimage.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
        if refimage.dtype != self.vae.dtype:
@@ -223,6 +224,11 @@ class StableDiffusionXLControlNetReferencePipeline(StableDiffusionXLControlNetPi

        # aligning device to prevent device errors when concating it with the latent model input
        ref_image_latents = ref_image_latents.to(device=device, dtype=dtype)
+
+        # cast back to fp16 if needed
+        if needs_upcasting:
+            self.vae.to(dtype=torch.float16)
+
        return ref_image_latents

    def prepare_ref_image(
--- a/examples/community/stable_diffusion_xl_reference.py
+++ b/examples/community/stable_diffusion_xl_reference.py
@@ -139,7 +139,8 @@ def retrieve_timesteps(
 class StableDiffusionXLReferencePipeline(StableDiffusionXLPipeline):
    def prepare_ref_latents(self, refimage, batch_size, dtype, device, generator, do_classifier_free_guidance):
        refimage = refimage.to(device=device)
-        if self.vae.dtype == torch.float16 and self.vae.config.force_upcast:
+        needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+        if needs_upcasting:
            self.upcast_vae()
            refimage = refimage.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
        if refimage.dtype != self.vae.dtype:
@@ -169,6 +170,11 @@ class StableDiffusionXLReferencePipeline(StableDiffusionXLPipeline):

        # aligning device to prevent device errors when concating it with the latent model input
        ref_image_latents = ref_image_latents.to(device=device, dtype=dtype)
+
+        # cast back to fp16 if needed
+        if needs_upcasting:
+            self.vae.to(dtype=torch.float16)
+
        return ref_image_latents

    def prepare_ref_image(