Spaces:

snap-research
/

KontinuousKontext

Sleeping

App Files Files Community

RishubhPar commited on Oct 30

Commit

f7a4a99

verified ·

1 Parent(s): e90aefb

small changes

Browse files

Files changed (1) hide show

app.py +109 -75

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import gc
 from typing import List, Tuple, Dict
 import json
 import spaces
 import torch
 import gradio as gr
@@ -26,80 +27,103 @@ if HF_TOKEN:
 # -----------------------------
 # Avoid meta-tensor init from environment leftovers
 os.environ.pop("ACCELERATE_INIT_EMPTY_WEIGHTS", None)
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-print("Using device:", DEVICE)
-torch.backends.cudnn.benchmark = True
 PIPELINE=None
 # -----------------------------
 # Model / pipeline loading
 # -----------------------------
-@torch.no_grad()
-def load_pipeline_single_gpu() -> FluxKontextSliderPipeline:
-    global PIPELINE, DEVICE
-    pretrained = "black-forest-labs/FLUX.1-Kontext-dev"
-    n_slider_layers = 4
-    slider_projector_out_dim = 6144
-    trained_models_path = "./model_weights/"
-    is_clip_input = True
-    # Load transformer fully on CPU; avoid meta tensors
-    transformer = FluxTransformer2DModelwithSliderConditioning.from_pretrained(
-        pretrained,
-        subfolder="transformer",
-        device_map=None,
-        low_cpu_mem_usage=False,
-        token=HF_TOKEN,
-    )
-    weight_dtype = transformer.dtype  # keep checkpoint dtype
-    # Slider projector
-    if is_clip_input:
-        slider_projector = SliderProjector(
-            out_dim=slider_projector_out_dim, pe_dim=2, n_layers=n_slider_layers, is_clip_input=True
         )
-    else:
-        slider_projector = SliderProjector_wo_clip(
-            out_dim=slider_projector_out_dim, pe_dim=2, n_layers=n_slider_layers
         )
-    # putting both the models to infer
-    transformer.eval()
-    slider_projector.eval()
-    # Load projector weights on CPU
-    slider_projector_path = os.path.join(trained_models_path, "slider_projector.pth")
-    state_dict = torch.load(slider_projector_path, map_location='cpu')
-    print("state_dict keys: {}".format(state_dict.keys()))
-    slider_projector.load_state_dict(state_dict)
-    print(f"loaded slider_projector from {slider_projector_path}")
-    # ------------------------------- --------------------- --------------------------- #
-    # Build full pipeline on CPU; no device_map sharding
-    PIPELINE = FluxKontextSliderPipeline.from_pretrained(
-        pretrained,
-        transformer=transformer,
-        slider_projector=slider_projector,
-        torch_dtype=weight_dtype,
-        device_map=None,
-        low_cpu_mem_usage=False,
-    )
-    print("loading the pipeline lora weights from: {}".format(trained_models_path))
-    PIPELINE.load_lora_weights(trained_models_path)
-    print("loaded the pipeline with lora weights from: {}".format(trained_models_path))
-    PIPELINE.to(DEVICE)
-# Initializing the pipeline with gpu
-print("INIT pipeline with the gpu")
-load_pipeline_single_gpu()
-print(f"[init] Pipeline loaded on {DEVICE}")
 # -----------------------------
@@ -287,23 +311,25 @@ def resize_image(img: Image.Image, target: int = 512) -> Image.Image:
     img = img.resize((new_w, new_h), resample)
     return img
-@spaces.GPU
-def _encode_prompt(prompt: str):
-    with torch.no_grad():
-        pe, ppe, _ = PIPELINE.encode_prompt(prompt, prompt_2=prompt)
-    return pe, ppe
 # -----------------------------
 # Inference functions
 # -----------------------------
-@spaces.GPU(duration=300)
 @torch.no_grad()
-def generate_image_stack_edits(text_prompt, n_edits, input_image, progress=gr.Progress(track_tqdm=True)):
     """
     Compute n_edits images on a single GPU for slider values in (0,1],
     return (list_of_images, first_image) so the UI shows immediately.
     """
     if not input_image or not text_prompt or text_prompt.startswith("Please select"):
         return [], None
@@ -312,7 +338,7 @@ def generate_image_stack_edits(text_prompt, n_edits, input_image, progress=gr.Pr
     slider_values = [(i + 1) / float(n) for i in range(n)]  # (0,1] inclusive
     img = resize_image(input_image, 512)
-    pe, ppe = _encode_prompt(text_prompt)
     results: List[Image.Image] = []
     gen_base = 64  # deterministic seed base
@@ -350,14 +376,15 @@ def generate_image_stack_edits(text_prompt, n_edits, input_image, progress=gr.Pr
     first = results[0] if results else None
     return results, first
-@spaces.GPU(duration=80)
-def generate_single_image(text_prompt, slider_value, input_image, progress=gr.Progress(track_tqdm=True)):
     if not input_image or not text_prompt or text_prompt.startswith("Please select"):
         return None
     img = resize_image(input_image, 512)
     sv = float(slider_value)
     pe, ppe = _encode_prompt(text_prompt)
     gen = torch.Generator(device=DEVICE if DEVICE != "cpu" else "cpu").manual_seed(64)
     with torch.no_grad():
@@ -492,7 +519,14 @@ def process_user_upload(uploaded_image, user_prompt, n_edits_val):
     return processed_image, generated_list, first_result, slider_update
 with gr.Blocks() as demo:
     gr.Markdown("# Kontinuous Kontext - Continuous Strength Control for Instruction-based Image Editing")
     # Add description section

 from typing import List, Tuple, Dict
 import json
 import spaces
+import traceback
 import torch
 import gradio as gr
 # -----------------------------
 # Avoid meta-tensor init from environment leftovers
 os.environ.pop("ACCELERATE_INIT_EMPTY_WEIGHTS", None)
 PIPELINE=None
 # -----------------------------
 # Model / pipeline loading
 # -----------------------------
+def _log(msg): print(msg, flush=True)
+def load_pipeline_single_gpu():
+    global PIPELINE
+    if PIPELINE is not None:
+        _log("[worker] PIPELINE already initialized; skipping.")
+        return "warm"
+    try:
+        os.environ.pop("ACCELERATE_INIT_EMPTY_WEIGHTS", None)
+        token = os.environ.get("HF_TOKEN")
+        cuda_ok = torch.cuda.is_available()
+        _log(f"[worker] cuda available: {cuda_ok}")
+        if cuda_ok:
+            torch.backends.cudnn.benchmark = True
+        # ---------- config ----------
+        pretrained = "black-forest-labs/FLUX.1-Kontext-dev"
+        trained_models_path = "./model_weights/"
+        projector_path = os.path.join(trained_models_path, "slider_projector.pth")
+        offload_dir = "/tmp/offload"; os.makedirs(offload_dir, exist_ok=True)
+        if not os.path.isdir(trained_models_path):
+            return f"error: missing dir {trained_models_path}"
+        if not os.path.isfile(projector_path):
+            return f"error: missing projector weights at {projector_path}"
+        # dtype selection to cut memory
+        if cuda_ok and torch.cuda.get_device_capability(0)[0] >= 8:
+            dtype = torch.bfloat16
+        elif cuda_ok:
+            dtype = torch.float16
+        else:
+            dtype = torch.float32
+        max_memory = {"cuda": "80GiB", "cpu": "60GiB"}  # tune if needed
+        _log("[worker] loading transformer (sharded/offloaded)…")
+        transformer = FluxTransformer2DModelwithSliderConditioning.from_pretrained(
+            pretrained,
+            subfolder="transformer",
+            token=token,
+            trust_remote_code=True,
+            torch_dtype=dtype,
+            low_cpu_mem_usage=True,
+            # device_map="balanced_low_0",
+            offload_folder=offload_dir,
+            offload_state_dict=True,
+            # max_memory=max_memory,
         )
+        weight_dtype = transformer.dtype
+        _log(f"[worker] transformer loaded, dtype={weight_dtype}")
+        _log("[worker] building slider projector…")
+        slider_projector = SliderProjector(out_dim=6144, pe_dim=2, n_layers=4, is_clip_input=True)
+        slider_projector.eval()
+        _log("[worker] loading projector weights…")
+        state_dict = torch.load(projector_path, map_location="cpu", weights_only=True)
+        slider_projector.load_state_dict(state_dict, strict=True)
+        _log("[worker] assembling pipeline (sharded/offloaded)…")
+        pipe = FluxKontextSliderPipeline.from_pretrained(
+            pretrained,
+            token=token,
+            trust_remote_code=True,
+            transformer=transformer,
+            slider_projector=slider_projector,
+            torch_dtype=weight_dtype,
+            low_cpu_mem_usage=True,
+            # device_map="balanced_low_0",
+            offload_folder=offload_dir,
+            offload_state_dict=True,
+            # max_memory=max_memory,
         )
+        _log("[worker] pipeline assembled.")
+        _log(f"[worker] loading LoRA from: {trained_models_path}")
+        pipe.load_lora_weights(trained_models_path)
+        _log("[worker] LoRA loaded.")
+        # DO NOT pipe.to("cuda") here; keep auto device_map to avoid OOM
+        PIPELINE = pipe
+        if cuda_ok:
+            free, total = torch.cuda.mem_get_info()
+            _log(f"[worker] VRAM free/total: {free/1e9:.2f}/{total/1e9:.2f} GB")
+        _log("[worker] PIPELINE ready.")
+        return "ok"
+    except Exception:
+        _log("[worker] init exception:\n" + traceback.format_exc())
+        return "error"
 # -----------------------------
     img = img.resize((new_w, new_h), resample)
     return img
 # -----------------------------
 # Inference functions
 # -----------------------------
+@spaces.GPU
 @torch.no_grad()
+def generate_image_stack_edits(text_prompt, n_edits, input_image):
     """
     Compute n_edits images on a single GPU for slider values in (0,1],
     return (list_of_images, first_image) so the UI shows immediately.
     """
+    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+    # if pipeline is null will initialize it simply.
+    global PIPELINE
+    if PIPELINE is None:
+        status = load_pipeline_single_gpu()
+    print("loaded pipeline status: {}".format(status))
     if not input_image or not text_prompt or text_prompt.startswith("Please select"):
         return [], None
     slider_values = [(i + 1) / float(n) for i in range(n)]  # (0,1] inclusive
     img = resize_image(input_image, 512)
+    pe, ppe, _ = PIPELINE.encode_prompt(prompt=text_prompt, prompt_2=text_prompt)
     results: List[Image.Image] = []
     gen_base = 64  # deterministic seed base
     first = results[0] if results else None
     return results, first
+@spaces.GPU
+def generate_single_image(text_prompt, slider_value, input_image):
     if not input_image or not text_prompt or text_prompt.startswith("Please select"):
         return None
     img = resize_image(input_image, 512)
     sv = float(slider_value)
     pe, ppe = _encode_prompt(text_prompt)
+    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
     gen = torch.Generator(device=DEVICE if DEVICE != "cpu" else "cpu").manual_seed(64)
     with torch.no_grad():
     return processed_image, generated_list, first_result, slider_update
+@spaces.GPU
+def gpu_warmup():
+    return load_pipeline_single_gpu()
 with gr.Blocks() as demo:
+    # warming up the demo for the first run
+    demo.load(gpu_warmup)
     gr.Markdown("# Kontinuous Kontext - Continuous Strength Control for Instruction-based Image Editing")
     # Add description section