Spaces:

K00B404
/

Llava_caption

Runtime error

App Files Files Community

K00B404 commited on Feb 26

Commit

ae61a84

verified ·

1 Parent(s): 7fd61d2

Create app.py

Browse files

Files changed (1) hide show

app.py +91 -0

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import gradio as gr
+from transformers import pipeline
+import requests
+from PIL import Image
+from io import BytesIO
+# Initialize the image-to-text pipeline
+def load_model():
+    return pipeline("image-text-to-text", model="llava-hf/llava-1.5-7b-hf")
+# Function to handle image captioning
+def caption_image(image, question=None):
+    pipe = load_model()
+    # Prepare messages based on whether a question is provided
+    if question and question.strip():
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image", "url": image},
+                    {"type": "text", "text": question},
+                ],
+            },
+        ]
+    else:
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image", "url": image},
+                    {"type": "text", "text": "Describe this image in detail."},
+                ],
+            },
+        ]
+    # Generate caption
+    result = pipe(text=messages, max_new_tokens=150)
+    return result[0]["generated_text"]
+# Function to handle example images via URL
+def process_example_url(url):
+    response = requests.get(url)
+    img = Image.open(BytesIO(response.content))
+    return img
+# Create Gradio interface
+with gr.Blocks(title="Image Captioning App") as demo:
+    gr.Markdown("# Image Captioning with LLaVA")
+    gr.Markdown("Upload an image and optionally ask a specific question about it.")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="Upload Image")
+            question_input = gr.Textbox(label="Question (optional)", placeholder="Ask a specific question about the image or leave blank for general description")
+            caption_button = gr.Button("Generate Caption")
+        with gr.Column():
+            caption_output = gr.Textbox(label="Generated Caption", lines=7)
+    # Add examples
+    example_images = [
+        ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg",
+         "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"],
+        ["https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1920px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
+         ""]
+    ]
+    gr.Examples(
+        examples=example_images,
+        inputs=[image_input, question_input],
+        fn=process_example_url,
+        cache_examples=True,
+    )
+    # Set up the button click event
+    caption_button.click(
+        fn=caption_image,
+        inputs=[image_input, question_input],
+        outputs=caption_output
+    )
+    gr.Markdown("### How to use:")
+    gr.Markdown("1. Upload an image by clicking the upload box or drag-and-drop")
+    gr.Markdown("2. Optionally type a specific question about the image")
+    gr.Markdown("3. Click 'Generate Caption' to get the result")
+    gr.Markdown("4. Try the examples below to see how it works")
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()