from fastapi import FastAPI, UploadFile, File from fastapi.responses import HTMLResponse from transformers import AutoProcessor, Florence2ForConditionalGeneration # <--- DIRECT IMPORT from PIL import Image import torch import io app = FastAPI() print("⏳ Initializing Florence-2 (Hardcoded Class Mode)...") # We use the community fork for clean config model_id = "florence-community/Florence-2-large" device = "cpu" try: # 1. Load Processor processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=False) # 2. Load Model using the SPECIFIC CLASS (No "AutoModel" guessing) model = Florence2ForConditionalGeneration.from_pretrained( model_id, trust_remote_code=False, torch_dtype=torch.float32 ).to(device) print("✅ Model Loaded Successfully!") except Exception as e: print(f"❌ Load Error: {e}") model = None processor = None # --- UI --- html_content = """ Naman's AI Vision

👁️ Florence-2 Vision AI

Advanced OCR & Image Understanding (CPU)



""" @app.get("/", response_class=HTMLResponse) def home(): return html_content @app.post("/analyze") async def analyze(task_prompt: str = "", file: UploadFile = File(...)): if not model: return {"error": "Model failed to load"} try: img = Image.open(io.BytesIO(await file.read())).convert("RGB") inputs = processor(text=task_prompt, images=img, return_tensors="pt").to(device) generated_ids = model.generate( input_ids=inputs["input_ids"], pixel_values=inputs["pixel_values"], max_new_tokens=1024, num_beams=3, do_sample=False ) text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] parsed = processor.post_process_generation(text, task=task_prompt, image_size=img.size) return {"result": str(parsed)} except Exception as e: return {"error": str(e)}