K00B404 commited on
Commit
ae61a84
·
verified ·
1 Parent(s): 7fd61d2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import requests
4
+ from PIL import Image
5
+ from io import BytesIO
6
+
7
+ # Initialize the image-to-text pipeline
8
+ def load_model():
9
+ return pipeline("image-text-to-text", model="llava-hf/llava-1.5-7b-hf")
10
+
11
+ # Function to handle image captioning
12
+ def caption_image(image, question=None):
13
+ pipe = load_model()
14
+
15
+ # Prepare messages based on whether a question is provided
16
+ if question and question.strip():
17
+ messages = [
18
+ {
19
+ "role": "user",
20
+ "content": [
21
+ {"type": "image", "url": image},
22
+ {"type": "text", "text": question},
23
+ ],
24
+ },
25
+ ]
26
+ else:
27
+ messages = [
28
+ {
29
+ "role": "user",
30
+ "content": [
31
+ {"type": "image", "url": image},
32
+ {"type": "text", "text": "Describe this image in detail."},
33
+ ],
34
+ },
35
+ ]
36
+
37
+ # Generate caption
38
+ result = pipe(text=messages, max_new_tokens=150)
39
+ return result[0]["generated_text"]
40
+
41
+ # Function to handle example images via URL
42
+ def process_example_url(url):
43
+ response = requests.get(url)
44
+ img = Image.open(BytesIO(response.content))
45
+ return img
46
+
47
+ # Create Gradio interface
48
+ with gr.Blocks(title="Image Captioning App") as demo:
49
+ gr.Markdown("# Image Captioning with LLaVA")
50
+ gr.Markdown("Upload an image and optionally ask a specific question about it.")
51
+
52
+ with gr.Row():
53
+ with gr.Column():
54
+ image_input = gr.Image(type="pil", label="Upload Image")
55
+ question_input = gr.Textbox(label="Question (optional)", placeholder="Ask a specific question about the image or leave blank for general description")
56
+ caption_button = gr.Button("Generate Caption")
57
+
58
+ with gr.Column():
59
+ caption_output = gr.Textbox(label="Generated Caption", lines=7)
60
+
61
+ # Add examples
62
+ example_images = [
63
+ ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg",
64
+ "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"],
65
+ ["https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/1920px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
66
+ ""]
67
+ ]
68
+
69
+ gr.Examples(
70
+ examples=example_images,
71
+ inputs=[image_input, question_input],
72
+ fn=process_example_url,
73
+ cache_examples=True,
74
+ )
75
+
76
+ # Set up the button click event
77
+ caption_button.click(
78
+ fn=caption_image,
79
+ inputs=[image_input, question_input],
80
+ outputs=caption_output
81
+ )
82
+
83
+ gr.Markdown("### How to use:")
84
+ gr.Markdown("1. Upload an image by clicking the upload box or drag-and-drop")
85
+ gr.Markdown("2. Optionally type a specific question about the image")
86
+ gr.Markdown("3. Click 'Generate Caption' to get the result")
87
+ gr.Markdown("4. Try the examples below to see how it works")
88
+
89
+ # Launch the app
90
+ if __name__ == "__main__":
91
+ demo.launch()