import gradio as gr from mineru_vl_utils.mineru_client import MinerUClient from PIL import Image import fitz # PyMuPDF pour lire les PDFs import os # Init client model_path = "opendatalab/MinerU2.5-2509-1.2B" client = MinerUClient( backend="transformers", model_path=model_path, device="cuda" # Utilisation GPU obligatoire ) def extract_from_file(file, progress=gr.Progress()): progress(0, desc="Analyse du fichier...") # Vérifier si PDF ou image ext = os.path.splitext(file.name)[-1].lower() images = [] if ext == ".pdf": doc = fitz.open(file.name) total_pages = len(doc) for i, page in enumerate(doc): pix = page.get_pixmap() img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) images.append(img) progress((i+1)/total_pages, desc=f"Conversion page {i+1}/{total_pages}") else: images.append(Image.open(file.name)) results = [] for i, img in enumerate(images): progress(i/len(images), desc=f"Extraction page {i+1}/{len(images)}") blocks = client.two_step_extract(img) text_blocks = [b.text for b in blocks if hasattr(b, "text")] results.append("\n".join(text_blocks)) progress(1, desc="Extraction terminée ✅") return "\n\n--- PAGE ---\n\n".join(results) demo = gr.Interface( fn=extract_from_file, inputs=gr.File(type="filepath", label="Upload PDF or Image"), outputs=gr.Textbox(label="Extracted Text", lines=20), title="MinerU2.5 Document Extractor", description="Upload a PDF or Image to extract structured text using MinerU2.5 with GPU." ) demo.launch()