YGu1998 commited on
Commit
6bc39f3
·
verified ·
1 Parent(s): f9b272a

Update sid/pipeline_sid_flux.py

Browse files
Files changed (1) hide show
  1. sid/pipeline_sid_flux.py +13 -9
sid/pipeline_sid_flux.py CHANGED
@@ -45,7 +45,7 @@ from diffusers.utils import (
45
  )
46
  from diffusers.utils.torch_utils import randn_tensor
47
  from diffusers.pipelines.pipeline_utils import DiffusionPipeline
48
- from .pipeline_output import SiDPipelineOutput
49
 
50
 
51
  if is_torch_xla_available():
@@ -84,7 +84,6 @@ def retrieve_timesteps(
84
  r"""
85
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
86
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
87
-
88
  Args:
89
  scheduler (`SchedulerMixin`):
90
  The scheduler to get timesteps from.
@@ -99,7 +98,6 @@ def retrieve_timesteps(
99
  sigmas (`List[float]`, *optional*):
100
  Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
101
  `num_inference_steps` and `timesteps` must be `None`.
102
-
103
  Returns:
104
  `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
105
  second element is the number of inference steps.
@@ -147,9 +145,7 @@ class SiDFluxPipeline(
147
  ):
148
  r"""
149
  The Flux pipeline for text-to-image generation.
150
-
151
  Reference: https://blackforestlabs.ai/announcing-black-forest-labs/
152
-
153
  Args:
154
  transformer ([`FluxTransformer2DModel`]):
155
  Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
@@ -342,7 +338,6 @@ class SiDFluxPipeline(
342
  lora_scale: Optional[float] = None,
343
  ):
344
  r"""
345
-
346
  Args:
347
  prompt (`str` or `List[str]`, *optional*):
348
  prompt to be encoded
@@ -713,7 +708,8 @@ class SiDFluxPipeline(
713
  callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
714
  callback_on_step_end_tensor_inputs: List[str] = ["latents"],
715
  max_sequence_length: int = 512,
716
- noise_type: str = "fresh", # 'fresh', 'ddim', 'fixed'
 
717
  ):
718
 
719
  height = height or self.default_sample_size * self.vae_scale_factor
@@ -773,6 +769,13 @@ class SiDFluxPipeline(
773
  latents,
774
  )
775
 
 
 
 
 
 
 
 
776
  # Denoising loop
777
  D_x = torch.zeros_like(latents).to(latents.device)
778
  initial_latents = latents.clone() if noise_type == "fixed" else None
@@ -838,8 +841,8 @@ class SiDFluxPipeline(
838
 
839
  flow_pred = self._unpack_latents(
840
  flow_pred,
841
- height=height * self.vae_scale_factor,
842
- width=width * self.vae_scale_factor,
843
  vae_scale_factor=self.vae_scale_factor,
844
  )
845
  D_x = latents - t.view(-1, 1, 1, 1) * flow_pred
@@ -857,3 +860,4 @@ class SiDFluxPipeline(
857
  return (image,)
858
 
859
  return SiDPipelineOutput(images=image)
 
 
45
  )
46
  from diffusers.utils.torch_utils import randn_tensor
47
  from diffusers.pipelines.pipeline_utils import DiffusionPipeline
48
+ from pipeline_output import SiDPipelineOutput
49
 
50
 
51
  if is_torch_xla_available():
 
84
  r"""
85
  Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
86
  custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
 
87
  Args:
88
  scheduler (`SchedulerMixin`):
89
  The scheduler to get timesteps from.
 
98
  sigmas (`List[float]`, *optional*):
99
  Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
100
  `num_inference_steps` and `timesteps` must be `None`.
 
101
  Returns:
102
  `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
103
  second element is the number of inference steps.
 
145
  ):
146
  r"""
147
  The Flux pipeline for text-to-image generation.
 
148
  Reference: https://blackforestlabs.ai/announcing-black-forest-labs/
 
149
  Args:
150
  transformer ([`FluxTransformer2DModel`]):
151
  Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
 
338
  lora_scale: Optional[float] = None,
339
  ):
340
  r"""
 
341
  Args:
342
  prompt (`str` or `List[str]`, *optional*):
343
  prompt to be encoded
 
708
  callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
709
  callback_on_step_end_tensor_inputs: List[str] = ["latents"],
710
  max_sequence_length: int = 512,
711
+ noise_type: str = "fresh", # 'fresh', 'ddim', 'fixed',
712
+ time_scale =1000,
713
  ):
714
 
715
  height = height or self.default_sample_size * self.vae_scale_factor
 
769
  latents,
770
  )
771
 
772
+
773
+ latents = self._unpack_latents(
774
+ latents,
775
+ height=height ,
776
+ width=width ,
777
+ vae_scale_factor=self.vae_scale_factor,
778
+ )
779
  # Denoising loop
780
  D_x = torch.zeros_like(latents).to(latents.device)
781
  initial_latents = latents.clone() if noise_type == "fixed" else None
 
841
 
842
  flow_pred = self._unpack_latents(
843
  flow_pred,
844
+ height=height ,
845
+ width=width ,
846
  vae_scale_factor=self.vae_scale_factor,
847
  )
848
  D_x = latents - t.view(-1, 1, 1, 1) * flow_pred
 
860
  return (image,)
861
 
862
  return SiDPipelineOutput(images=image)
863
+