Spaces:

Naman712
/

florence-2

Sleeping

File size: 5,111 Bytes

e72c8ca
4d13530
7eddc37
e72c8ca
 
 
4d13530
e72c8ca
 
7eddc37
e72c8ca
7eddc37
fb5b735
c4592de
 
2c72d2c
7eddc37
 
 
 
 
24de656
 
12579ea
2c72d2c
7eddc37
24de656
2c72d2c
 
4d13530
fb5b735
 
12579ea
24de656
e72c8ca
 
 
 
 
 
fb5b735
e72c8ca
24de656
 
 
 
 
 
 
e72c8ca
 
 
24de656
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e72c8ca
 
 
 
 
24de656
 
2c72d2c
 
24de656
 
 
 
 
e72c8ca
 
24de656
 
 
 
 
2c72d2c
e72c8ca
2c72d2c
e72c8ca
 
 
24de656
 
 
 
 
 
e72c8ca
24de656
 
e72c8ca
 
 
 
 
 
 
24de656
e72c8ca
 
 
24de656
e72c8ca
24de656
 
 
e72c8ca
 
 
 
24de656
 
e72c8ca
24de656
 
 
e72c8ca

from fastapi import FastAPI, UploadFile, File
from fastapi.responses import HTMLResponse
from transformers import AutoProcessor, Florence2ForConditionalGeneration # <--- DIRECT IMPORT
from PIL import Image
import torch
import io

app = FastAPI()

print("⏳ Initializing Florence-2 (Hardcoded Class Mode)...")

# We use the community fork for clean config
model_id = "florence-community/Florence-2-large"
device = "cpu"

try:
    # 1. Load Processor
    processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=False)

    # 2. Load Model using the SPECIFIC CLASS (No "AutoModel" guessing)
    model = Florence2ForConditionalGeneration.from_pretrained(
        model_id, 
        trust_remote_code=False, 
        torch_dtype=torch.float32
    ).to(device)
    
    print("✅ Model Loaded Successfully!")

except Exception as e:
    print(f"❌ Load Error: {e}")
    model = None
    processor = None

# --- UI ---
html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Naman's AI Vision</title>
    <style>
        body { font-family: sans-serif; background: #0f172a; color: white; display: flex; flex-direction: column; align-items: center; min-height: 100vh; margin: 0; padding: 20px; }
        .box { background: #1e293b; padding: 30px; border-radius: 15px; width: 100%; max-width: 600px; text-align: center; border: 1px solid #334155; }
        h1 { margin-top: 0; color: #38bdf8; }
        button { background: #38bdf8; color: #000; border: none; padding: 10px 20px; border-radius: 5px; font-weight: bold; cursor: pointer; margin-top: 10px; }
        button:disabled { opacity: 0.5; }
        #result { margin-top: 20px; white-space: pre-wrap; text-align: left; background: #000; padding: 15px; border-radius: 5px; font-family: monospace; display: none; }
        img { max-width: 100%; border-radius: 10px; margin-top: 10px; display: none; }
    </style>
</head>
<body>
    <div class="box">
        <h1>👁️ Florence-2 Vision AI</h1>
        <p>Advanced OCR & Image Understanding (CPU)</p>
        
        <input type="file" id="file" accept="image/*" style="display: none;">
        <button onclick="document.getElementById('file').click()">📂 Upload Image</button>
        
        <br><br>
        <select id="task" style="padding: 10px; border-radius: 5px;">
            <option value="<OCR>">📄 Read Text (OCR)</option>
            <option value="<CAPTION>">🖼️ Describe Image</option>
            <option value="<OD>">📦 Detect Objects</option>
        </select>
        <button onclick="runAI()" id="runBtn">Run AI</button>

        <img id="preview">
        <div id="result"></div>
    </div>

    <script>
        const fileInput = document.getElementById('file');
        const preview = document.getElementById('preview');
        const result = document.getElementById('result');
        const runBtn = document.getElementById('runBtn');
        let currentFile = null;

        fileInput.addEventListener('change', (e) => {
            currentFile = e.target.files[0];
            preview.src = URL.createObjectURL(currentFile);
            preview.style.display = 'block';
            result.style.display = 'none';
        });

        async function runAI() {
            if (!currentFile) return alert("Select an image first!");
            runBtn.innerText = "Processing...";
            runBtn.disabled = true;
            result.style.display = 'none';

            const formData = new FormData();
            formData.append('file', currentFile);
            formData.append('task_prompt', document.getElementById('task').value);

            try {
                const res = await fetch('/analyze', { method: 'POST', body: formData });
                const data = await res.json();
                result.innerText = data.result || "Error: " + JSON.stringify(data);
                result.style.display = 'block';
            } catch (e) {
                alert("Error: " + e);
            }
            runBtn.innerText = "Run AI";
            runBtn.disabled = false;
        }
    </script>
</body>
</html>
"""

@app.get("/", response_class=HTMLResponse)
def home(): return html_content

@app.post("/analyze")
async def analyze(task_prompt: str = "<OCR>", file: UploadFile = File(...)):
    if not model: return {"error": "Model failed to load"}
    try:
        img = Image.open(io.BytesIO(await file.read())).convert("RGB")
        inputs = processor(text=task_prompt, images=img, return_tensors="pt").to(device)
        
        generated_ids = model.generate(
            input_ids=inputs["input_ids"],
            pixel_values=inputs["pixel_values"],
            max_new_tokens=1024,
            num_beams=3,
            do_sample=False
        )
        text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
        parsed = processor.post_process_generation(text, task=task_prompt, image_size=img.size)
        return {"result": str(parsed)}
    except Exception as e:
        return {"error": str(e)}