Update sid/pipeline_sid_flux.py
Browse files- sid/pipeline_sid_flux.py +13 -9
sid/pipeline_sid_flux.py
CHANGED
|
@@ -45,7 +45,7 @@ from diffusers.utils import (
|
|
| 45 |
)
|
| 46 |
from diffusers.utils.torch_utils import randn_tensor
|
| 47 |
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
| 48 |
-
from
|
| 49 |
|
| 50 |
|
| 51 |
if is_torch_xla_available():
|
|
@@ -84,7 +84,6 @@ def retrieve_timesteps(
|
|
| 84 |
r"""
|
| 85 |
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
| 86 |
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
| 87 |
-
|
| 88 |
Args:
|
| 89 |
scheduler (`SchedulerMixin`):
|
| 90 |
The scheduler to get timesteps from.
|
|
@@ -99,7 +98,6 @@ def retrieve_timesteps(
|
|
| 99 |
sigmas (`List[float]`, *optional*):
|
| 100 |
Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
|
| 101 |
`num_inference_steps` and `timesteps` must be `None`.
|
| 102 |
-
|
| 103 |
Returns:
|
| 104 |
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
|
| 105 |
second element is the number of inference steps.
|
|
@@ -147,9 +145,7 @@ class SiDFluxPipeline(
|
|
| 147 |
):
|
| 148 |
r"""
|
| 149 |
The Flux pipeline for text-to-image generation.
|
| 150 |
-
|
| 151 |
Reference: https://blackforestlabs.ai/announcing-black-forest-labs/
|
| 152 |
-
|
| 153 |
Args:
|
| 154 |
transformer ([`FluxTransformer2DModel`]):
|
| 155 |
Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
|
|
@@ -342,7 +338,6 @@ class SiDFluxPipeline(
|
|
| 342 |
lora_scale: Optional[float] = None,
|
| 343 |
):
|
| 344 |
r"""
|
| 345 |
-
|
| 346 |
Args:
|
| 347 |
prompt (`str` or `List[str]`, *optional*):
|
| 348 |
prompt to be encoded
|
|
@@ -713,7 +708,8 @@ class SiDFluxPipeline(
|
|
| 713 |
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
| 714 |
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
| 715 |
max_sequence_length: int = 512,
|
| 716 |
-
noise_type: str = "fresh", # 'fresh', 'ddim', 'fixed'
|
|
|
|
| 717 |
):
|
| 718 |
|
| 719 |
height = height or self.default_sample_size * self.vae_scale_factor
|
|
@@ -773,6 +769,13 @@ class SiDFluxPipeline(
|
|
| 773 |
latents,
|
| 774 |
)
|
| 775 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 776 |
# Denoising loop
|
| 777 |
D_x = torch.zeros_like(latents).to(latents.device)
|
| 778 |
initial_latents = latents.clone() if noise_type == "fixed" else None
|
|
@@ -838,8 +841,8 @@ class SiDFluxPipeline(
|
|
| 838 |
|
| 839 |
flow_pred = self._unpack_latents(
|
| 840 |
flow_pred,
|
| 841 |
-
height=height
|
| 842 |
-
width=width
|
| 843 |
vae_scale_factor=self.vae_scale_factor,
|
| 844 |
)
|
| 845 |
D_x = latents - t.view(-1, 1, 1, 1) * flow_pred
|
|
@@ -857,3 +860,4 @@ class SiDFluxPipeline(
|
|
| 857 |
return (image,)
|
| 858 |
|
| 859 |
return SiDPipelineOutput(images=image)
|
|
|
|
|
|
| 45 |
)
|
| 46 |
from diffusers.utils.torch_utils import randn_tensor
|
| 47 |
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
| 48 |
+
from pipeline_output import SiDPipelineOutput
|
| 49 |
|
| 50 |
|
| 51 |
if is_torch_xla_available():
|
|
|
|
| 84 |
r"""
|
| 85 |
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
|
| 86 |
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
|
|
|
|
| 87 |
Args:
|
| 88 |
scheduler (`SchedulerMixin`):
|
| 89 |
The scheduler to get timesteps from.
|
|
|
|
| 98 |
sigmas (`List[float]`, *optional*):
|
| 99 |
Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
|
| 100 |
`num_inference_steps` and `timesteps` must be `None`.
|
|
|
|
| 101 |
Returns:
|
| 102 |
`Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
|
| 103 |
second element is the number of inference steps.
|
|
|
|
| 145 |
):
|
| 146 |
r"""
|
| 147 |
The Flux pipeline for text-to-image generation.
|
|
|
|
| 148 |
Reference: https://blackforestlabs.ai/announcing-black-forest-labs/
|
|
|
|
| 149 |
Args:
|
| 150 |
transformer ([`FluxTransformer2DModel`]):
|
| 151 |
Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
|
|
|
|
| 338 |
lora_scale: Optional[float] = None,
|
| 339 |
):
|
| 340 |
r"""
|
|
|
|
| 341 |
Args:
|
| 342 |
prompt (`str` or `List[str]`, *optional*):
|
| 343 |
prompt to be encoded
|
|
|
|
| 708 |
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
| 709 |
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
| 710 |
max_sequence_length: int = 512,
|
| 711 |
+
noise_type: str = "fresh", # 'fresh', 'ddim', 'fixed',
|
| 712 |
+
time_scale =1000,
|
| 713 |
):
|
| 714 |
|
| 715 |
height = height or self.default_sample_size * self.vae_scale_factor
|
|
|
|
| 769 |
latents,
|
| 770 |
)
|
| 771 |
|
| 772 |
+
|
| 773 |
+
latents = self._unpack_latents(
|
| 774 |
+
latents,
|
| 775 |
+
height=height ,
|
| 776 |
+
width=width ,
|
| 777 |
+
vae_scale_factor=self.vae_scale_factor,
|
| 778 |
+
)
|
| 779 |
# Denoising loop
|
| 780 |
D_x = torch.zeros_like(latents).to(latents.device)
|
| 781 |
initial_latents = latents.clone() if noise_type == "fixed" else None
|
|
|
|
| 841 |
|
| 842 |
flow_pred = self._unpack_latents(
|
| 843 |
flow_pred,
|
| 844 |
+
height=height ,
|
| 845 |
+
width=width ,
|
| 846 |
vae_scale_factor=self.vae_scale_factor,
|
| 847 |
)
|
| 848 |
D_x = latents - t.view(-1, 1, 1, 1) * flow_pred
|
|
|
|
| 860 |
return (image,)
|
| 861 |
|
| 862 |
return SiDPipelineOutput(images=image)
|
| 863 |
+
|