Spaces:
Sleeping
Sleeping
File size: 5,111 Bytes
e72c8ca 4d13530 7eddc37 e72c8ca 4d13530 e72c8ca 7eddc37 e72c8ca 7eddc37 fb5b735 c4592de 2c72d2c 7eddc37 24de656 12579ea 2c72d2c 7eddc37 24de656 2c72d2c 4d13530 fb5b735 12579ea 24de656 e72c8ca fb5b735 e72c8ca 24de656 e72c8ca 24de656 e72c8ca 24de656 2c72d2c 24de656 e72c8ca 24de656 2c72d2c e72c8ca 2c72d2c e72c8ca 24de656 e72c8ca 24de656 e72c8ca 24de656 e72c8ca 24de656 e72c8ca 24de656 e72c8ca 24de656 e72c8ca 24de656 e72c8ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import HTMLResponse
from transformers import AutoProcessor, Florence2ForConditionalGeneration # <--- DIRECT IMPORT
from PIL import Image
import torch
import io
app = FastAPI()
print("β³ Initializing Florence-2 (Hardcoded Class Mode)...")
# We use the community fork for clean config
model_id = "florence-community/Florence-2-large"
device = "cpu"
try:
# 1. Load Processor
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=False)
# 2. Load Model using the SPECIFIC CLASS (No "AutoModel" guessing)
model = Florence2ForConditionalGeneration.from_pretrained(
model_id,
trust_remote_code=False,
torch_dtype=torch.float32
).to(device)
print("β
Model Loaded Successfully!")
except Exception as e:
print(f"β Load Error: {e}")
model = None
processor = None
# --- UI ---
html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Naman's AI Vision</title>
<style>
body { font-family: sans-serif; background: #0f172a; color: white; display: flex; flex-direction: column; align-items: center; min-height: 100vh; margin: 0; padding: 20px; }
.box { background: #1e293b; padding: 30px; border-radius: 15px; width: 100%; max-width: 600px; text-align: center; border: 1px solid #334155; }
h1 { margin-top: 0; color: #38bdf8; }
button { background: #38bdf8; color: #000; border: none; padding: 10px 20px; border-radius: 5px; font-weight: bold; cursor: pointer; margin-top: 10px; }
button:disabled { opacity: 0.5; }
#result { margin-top: 20px; white-space: pre-wrap; text-align: left; background: #000; padding: 15px; border-radius: 5px; font-family: monospace; display: none; }
img { max-width: 100%; border-radius: 10px; margin-top: 10px; display: none; }
</style>
</head>
<body>
<div class="box">
<h1>ποΈ Florence-2 Vision AI</h1>
<p>Advanced OCR & Image Understanding (CPU)</p>
<input type="file" id="file" accept="image/*" style="display: none;">
<button onclick="document.getElementById('file').click()">π Upload Image</button>
<br><br>
<select id="task" style="padding: 10px; border-radius: 5px;">
<option value="<OCR>">π Read Text (OCR)</option>
<option value="<CAPTION>">πΌοΈ Describe Image</option>
<option value="<OD>">π¦ Detect Objects</option>
</select>
<button onclick="runAI()" id="runBtn">Run AI</button>
<img id="preview">
<div id="result"></div>
</div>
<script>
const fileInput = document.getElementById('file');
const preview = document.getElementById('preview');
const result = document.getElementById('result');
const runBtn = document.getElementById('runBtn');
let currentFile = null;
fileInput.addEventListener('change', (e) => {
currentFile = e.target.files[0];
preview.src = URL.createObjectURL(currentFile);
preview.style.display = 'block';
result.style.display = 'none';
});
async function runAI() {
if (!currentFile) return alert("Select an image first!");
runBtn.innerText = "Processing...";
runBtn.disabled = true;
result.style.display = 'none';
const formData = new FormData();
formData.append('file', currentFile);
formData.append('task_prompt', document.getElementById('task').value);
try {
const res = await fetch('/analyze', { method: 'POST', body: formData });
const data = await res.json();
result.innerText = data.result || "Error: " + JSON.stringify(data);
result.style.display = 'block';
} catch (e) {
alert("Error: " + e);
}
runBtn.innerText = "Run AI";
runBtn.disabled = false;
}
</script>
</body>
</html>
"""
@app.get("/", response_class=HTMLResponse)
def home(): return html_content
@app.post("/analyze")
async def analyze(task_prompt: str = "<OCR>", file: UploadFile = File(...)):
if not model: return {"error": "Model failed to load"}
try:
img = Image.open(io.BytesIO(await file.read())).convert("RGB")
inputs = processor(text=task_prompt, images=img, return_tensors="pt").to(device)
generated_ids = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=1024,
num_beams=3,
do_sample=False
)
text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
parsed = processor.post_process_generation(text, task=task_prompt, image_size=img.size)
return {"result": str(parsed)}
except Exception as e:
return {"error": str(e)} |