Spaces:

oxkitsune
/

rerun-ml-depth-pro

Running

App Files Files Community

oxkitsune commited on Oct 11, 2024

Commit

eec6e0c

1 Parent(s): afc0455

clean up logging

Browse files

Files changed (2) hide show

app.py +62 -42
get_pretrained_models.sh +1 -2

app.py CHANGED Viewed

@@ -49,13 +49,9 @@ def predict(frame):
 @rr.thread_local_stream("rerun_example_ml_depth_pro")
-def run_ml_depth_pro(frame):
     stream = rr.binary_stream()
-    assert model is not None, "Model is None"
-    assert transform is not None, "Transform is None"
-    assert frames is not None, "Frames is None"
     blueprint = rrb.Blueprint(
         rrb.Vertical(
             rrb.Spatial3DView(origin="/"),
@@ -69,38 +65,64 @@ def run_ml_depth_pro(frame):
         collapse_panels=True,
     )
     rr.send_blueprint(blueprint)
-    # for i, frame in enumerate(frames):
-    rr.set_time_sequence("frame", 0)
-    rr.log("world/camera/image", rr.Image(frame))
-    depth, focal_length = predict(frame)
-    rr.log(
-        "world/camera",
-        rr.Pinhole(
-            width=frame.shape[1],
-            height=frame.shape[0],
-            focal_length=focal_length,
-            principal_point=(frame.shape[1] / 2, frame.shape[0] / 2),
-            image_plane_distance=depth.max(),
-        ),
-    )
-    rr.log(
-        "world/camera/depth",
-        # need 0.19 stable for this
-        # rr.DepthImage(depth, meter=1, depth_range=(depth.min(), depth.max())),
-        rr.DepthImage(depth, meter=1),
-    )
-    yield stream.read()
-video_path = Path("hd-cat.mp4")
 # Load video
 frames = []
@@ -114,21 +136,19 @@ while True:
     frames.append(frame)
 with gr.Blocks() as demo:
-    with gr.Tab("Streaming"):
-        with gr.Row():
-            img = gr.Image(interactive=True, label="Image")
-            with gr.Column():
-                stream_ml_depth_pro = gr.Button("Stream Ml Depth Pro")
-        with gr.Row():
-            viewer = Rerun(
-                streaming=True,
-                panel_states={
-                    "time": "collapsed",
-                    "blueprint": "hidden",
-                    "selection": "hidden",
-                },
-            )
-        stream_ml_depth_pro.click(run_ml_depth_pro, inputs=[img], outputs=[viewer])
 if __name__ == "__main__":

 @rr.thread_local_stream("rerun_example_ml_depth_pro")
+def run_rerun(path_to_video):
     stream = rr.binary_stream()
     blueprint = rrb.Blueprint(
         rrb.Vertical(
             rrb.Spatial3DView(origin="/"),
         collapse_panels=True,
     )
     rr.send_blueprint(blueprint)
+    yield stream.read()
+    print("Loading video from", path_to_video)
+    video = cv2.VideoCapture(path_to_video)
+    frame_idx = 0
+    while True:
+        read, frame = video.read()
+        if not read:
+            break
+        frame = cv2.resize(frame, (320, 240))
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        rr.set_time_sequence("frame", frame_idx)
+        rr.log("world/camera/image", rr.Image(frame))
+        yield stream.read()
+        image = transform(frame)
+        depth, focal_length = estimate_depth(image)
+        rr.log(
+            "world/camera",
+            rr.Pinhole(
+                width=frame.shape[1],
+                height=frame.shape[0],
+                focal_length=,
+                principal_point=(frame.shape[1] / 2, frame.shape[0] / 2),
+                image_plane_distance=depth.max(),
+            ),
+        )
+        rr.log(
+            "world/camera/depth",
+            # need 0.19 stable for this
+            # rr.DepthImage(depth, meter=1, depth_range=(depth.min(), depth.max())),
+            rr.DepthImage(depth, meter=1),
+        )
+        yield stream.read()
+@spaces.GPU(duration=20)
+def estimate_depth(image):
+    prediction = model.infer(image)
+    depth = prediction["depth"].squeeze().detach().cpu().numpy()
+    focal_length = prediction["focallength_px"].item()
+    return depth, focal_length
+video_path = Path("hd-cat.mp4")
 # Load video
 frames = []
     frames.append(frame)
 with gr.Blocks() as demo:
+    video = gr.Video(interactive=True, label="Video")
+    visualize = gr.Button("Visualize ML Depth Pro")
+    with gr.Row():
+        viewer = Rerun(
+            streaming=True,
+            panel_states={
+                "time": "collapsed",
+                "blueprint": "hidden",
+                "selection": "hidden",
+            },
+        )
+        visualize.click(run_rerun, inputs=[video], outputs=[viewer])
 if __name__ == "__main__":

get_pretrained_models.sh CHANGED Viewed

@@ -4,5 +4,4 @@
 # Copyright (C) 2024 Apple Inc. All Rights Reserved.
 #
 mkdir -p checkpoints
-# Place final weights here:
-wget https://ml-site.cdn-apple.com/models/depth-pro/depth_pro.pt -P checkpoints

 # Copyright (C) 2024 Apple Inc. All Rights Reserved.
 #
 mkdir -p checkpoints
+wget -q https://ml-site.cdn-apple.com/models/depth-pro/depth_pro.pt -P checkpoints