Spaces:

adil9858
/

dalton_vision

Sleeping

App Files Files Community

adil9858 commited on May 2

Commit

4b25820

verified ·

1 Parent(s): 3d1d7d0

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -113

app.py CHANGED Viewed

@@ -3,22 +3,17 @@ from openai import OpenAI
 import base64
 from PIL import Image
 import io
-import os
-import time
-# Initialize OpenAI client
 client = OpenAI(
     base_url="https://openrouter.ai/api/v1",
     api_key='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'
 )
-def capture_image():
-    # This will trigger the camera capture in the frontend
-    return None
 def analyze_image(image, prompt):
     if image is None:
-        return "Please capture or upload an image first."
     # Convert image to base64
     buffered = io.BytesIO()
@@ -31,7 +26,13 @@ def analyze_image(image, prompt):
             messages=[
                 {
                     "role": "system",
-                    "content": """You are an expert AI assistant specialized in image understanding."""
                 },
                 {
                     "role": "user",
@@ -48,126 +49,91 @@ def analyze_image(image, prompt):
             ],
             max_tokens=2048
         )
-        return response.choices[0].message.content
     except Exception as e:
-        return f"Error: {str(e)}"
 css = """
-#camera-container {
-    position: relative;
-    width: 100%;
-    margin: 0 auto;
-}
-#camera-input {
-    width: 100% !important;
-    min-height: 300px;
-}
-#capture-btn {
-    position: absolute;
-    bottom: 20px;
-    left: 50%;
-    transform: translateX(-50%);
-    z-index: 100;
-    background: white;
-    border-radius: 50%;
-    width: 60px;
-    height: 60px;
-    border: 3px solid #4a6cf7;
-    cursor: pointer;
-}
-#capture-btn:active {
-    transform: translateX(-50%) scale(0.95);
 }
-.mobile-controls {
-    display: flex;
-    gap: 10px;
-    margin-top: 10px;
-    justify-content: center;
-}
-@media (min-width: 768px) {
-    #camera-container {
-        max-width: 500px;
-    }
 }
 """
-with gr.Blocks(css=css, title="DaltonVision Camera") as demo:
     gr.Markdown("""
-    # 📸 DaltonVision - Camera Mode
-    ### Take pictures directly in the app for analysis
     """)
-    with gr.Column():
-        with gr.Row():
-            with gr.Column(elem_id="camera-container"):
-                # Camera component
-                camera = gr.Image(
-                    sources=["webcam"],
-                    type="pil",
-                    label="Camera Preview",
-                    elem_id="camera-input",
-                    interactive=False,
-                    mirror_webcam=False
-                )
-                # Hidden button that triggers capture
-                capture_trigger = gr.Button("Capture", visible=False)
-                # Custom capture button
-                with gr.Row(elem_classes="mobile-controls"):
-                    gr.HTML("""
-                    <div id="capture-btn" onclick="document.querySelector('#capture-btn-hidden').click()"></div>
-                    """)
-                    capture_btn = gr.Button("Capture Photo", elem_id="capture-btn-hidden", visible=False)
-                    flip_btn = gr.Button("🔄 Flip Camera")
-                    reset_btn = gr.Button("❌ Reset")
-                # Upload fallback
-                upload = gr.UploadButton("📁 Upload Instead", file_types=["image"])
-        prompt = gr.Textbox(
-            label="Ask about the image",
-            placeholder="What would you like to know about this image?",
-            lines=3
-        )
-        submit_btn = gr.Button("Analyze Image", variant="primary")
-        output = gr.Textbox(
-            label="Analysis Results",
-            interactive=False,
-            lines=10,
-            show_copy_button=True
-        )
-    # Event handlers
-    capture_btn.click(
-        capture_image,
-        outputs=camera
-    )
-    flip_btn.click(
-        None,
-        _js="() => { document.querySelector('video').style.transform = document.querySelector('video').style.transform === 'scaleX(-1)' ? 'scaleX(1)' : 'scaleX(-1)'; }"
-    )
-    reset_btn.click(
-        lambda: (None, ""),
-        outputs=[camera, output]
-    )
-    upload.upload(
-        lambda file: Image.open(file.name),
-        inputs=upload,
-        outputs=camera
-    )
     submit_btn.click(
-        analyze_image,
-        inputs=[camera, prompt],
-        outputs=output
     )
 if __name__ == "__main__":
     demo.launch()

 import base64
 from PIL import Image
 import io
+from datetime import datetime
+# OpenAI client setup
 client = OpenAI(
     base_url="https://openrouter.ai/api/v1",
     api_key='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'
 )
 def analyze_image(image, prompt):
     if image is None:
+        return "Please upload or capture an image first."
     # Convert image to base64
     buffered = io.BytesIO()
             messages=[
                 {
                     "role": "system",
+                    "content": """You are Dalton, an expert AI assistant specialized in image understanding.
+                    Your tasks include:
+                    - Extracting and structuring text from images
+                    - Answering questions about image content
+                    - Providing detailed descriptions
+                    - Analyzing receipts, documents, and other visual content
+                    Be thorough, accurate, and helpful in your responses."""
                 },
                 {
                     "role": "user",
             ],
             max_tokens=2048
         )
+        result = response.choices[0].message.content
+        return result
     except Exception as e:
+        return f"An error occurred: {str(e)}"
+# Custom CSS for better mobile experience
 css = """
+#mobile-camera { width: 100% !important; }
+#prompt-textbox { min-height: 100px !important; }
+.result-box {
+    max-height: 500px;
+    overflow-y: auto;
+    padding: 15px;
+    border: 1px solid #e0e0e0;
+    border-radius: 8px;
 }
+.footer {
+    margin-top: 20px;
+    font-size: 12px;
+    color: #666;
+    text-align: center;
 }
 """
+with gr.Blocks(css=css, title="DaltonVision - Koshur AI") as demo:
     gr.Markdown("""
+    # 🧾 DaltonVision - InternVL3-14B
+    ### Advanced Image Understanding • Powered by OpenRouter • Developed by [Koshur AI](https://koshurai.com)
     """)
+    with gr.Row():
+        with gr.Column():
+            # Image input section
+            image_input = gr.Image(
+                sources=["upload", "webcam"],
+                type="pil",
+                label="Upload or Capture Image",
+                elem_id="mobile-camera"
+            )
+            # Prompt input
+            prompt_input = gr.Textbox(
+                label="📝 Enter your question or instruction",
+                value="Extract all content structurally",
+                lines=3,
+                elem_id="prompt-textbox"
+            )
+            submit_btn = gr.Button("🔍 Analyze Image", variant="primary")
+            gr.Examples(
+                examples=[
+                    ["What is the total amount on this receipt?"],
+                    ["List all items and their prices"],
+                    ["Who is the vendor and what is the date?"],
+                    ["Describe this image in detail"]
+                ],
+                inputs=[prompt_input],
+                label="💡 Try these example prompts:"
+            )
+        with gr.Column():
+            # Result output
+            result_output = gr.Markdown(
+                label="✅ Analysis Result",
+                elem_classes="result-box"
+            )
+    # Footer
+    gr.Markdown("""
+    <div class="footer">
+    © 2025 Koshur AI. All rights reserved.<br>
+    Note: Images are processed in real-time and not stored.
+    </div>
+    """)
+    # Button action
     submit_btn.click(
+        fn=analyze_image,
+        inputs=[image_input, prompt_input],
+        outputs=result_output
     )
+# Launch the app
 if __name__ == "__main__":
     demo.launch()