File size: 5,680 Bytes
dbe2c62
 
 
 
 
 
ba2281a
dbe2c62
 
 
 
 
 
 
ef9710d
dbe2c62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba2281a
ef9710d
 
 
 
 
 
 
 
ba2281a
 
 
 
 
 
 
 
 
 
 
 
 
dbe2c62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
"""
FastAPI gateway for your App_Caller pipeline.

✅ Giữ nguyên pipeline gốc (App_Caller.py)
✅ Tương thích Hugging Face Spaces (Docker)
✅ Có Bearer token, Swagger UI (/docs)
✅ Endpoint: /, /health, /process_pdf, /search, /summarize
"""

import os
import time
from typing import Optional

from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Header
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel

# -------------------------
# 🔒 Bearer token (optional)
# -------------------------
API_SECRET = os.getenv("API_SECRET", "").strip()

def require_bearer(authorization: Optional[str] = Header(None)):
    """Kiểm tra Bearer token nếu bật API_SECRET."""
    if not API_SECRET:
        return  # Không bật xác thực
    if not authorization or not authorization.startswith("Bearer "):
        raise HTTPException(status_code=401, detail="Missing Bearer token")
    token = authorization.split(" ", 1)[1].strip()
    if token != API_SECRET:
        raise HTTPException(status_code=403, detail="Invalid token")

# -------------------------
# 🧩 Import project modules
# -------------------------
try:
    import App_Caller as APP_CALLER
    print("✅ Đã load App_Caller.")
except Exception as e:
    APP_CALLER = None
    print(f"⚠️ Không thể import App_Caller: {e}")

# -------------------------
# 🚀 Init FastAPI
# -------------------------
app = FastAPI(
    title="Document AI API (FastAPI)",
    version="2.0.0",
    description="API xử lý PDF: trích xuất, tóm tắt, tìm kiếm, phân loại.",
)

# Cho phép gọi API từ web client
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# -------------------------
# 🏠 Root endpoint (tránh 404 trên Spaces)
# -------------------------
@app.get("/")
def root():
    """Trang chào mừng / kiểm tra trạng thái."""
    return {
        "message": "📘 Document AI API đang chạy.",
        "status": "ok",
        "docs": "/docs",
        "endpoints": ["/process_pdf", "/search", "/summarize", "/health"],
    }

# -------------------------
# 🩺 /health
# -------------------------
@app.get("/health")
def health(_=Depends(require_bearer)):
    """Kiểm tra trạng thái hoạt động."""
    return {
        "status": "ok",
        "time": time.time(),
        "App_Caller": bool(APP_CALLER),
        "has_fileProcess": hasattr(APP_CALLER, "fileProcess") if APP_CALLER else False,
    }

# -------------------------
# 📘 /process_pdf
# -------------------------
@app.post("/process_pdf")
async def process_pdf(file: UploadFile = File(...), _=Depends(require_bearer)):
    """Nhận file PDF → chạy App_Caller.fileProcess → trả về summary + category."""
    if not file.filename.lower().endswith(".pdf"):
        raise HTTPException(status_code=400, detail="Chỉ chấp nhận file PDF.")

    pdf_bytes = await file.read()

    if not APP_CALLER or not hasattr(APP_CALLER, "fileProcess"):
        raise HTTPException(status_code=500, detail="Không tìm thấy App_Caller.fileProcess().")

    try:
        result = APP_CALLER.fileProcess(pdf_bytes)
        return {
            "status": "success",
            "checkstatus": result.get("checkstatus"),
            "summary": result.get("summary"),
            "category": result.get("category"),
            "top_candidates": result.get("reranked", []),
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Lỗi xử lý PDF: {str(e)}")

# -------------------------
# 🔍 /search
# -------------------------
class SearchIn(BaseModel):
    query: str
    k: int = 10

@app.post("/search")
def search(body: SearchIn, _=Depends(require_bearer)):
    """Tìm kiếm bằng FAISS + Rerank từ App_Caller.runSearch()."""
    q = (body.query or "").strip()
    if not q:
        raise HTTPException(status_code=400, detail="query không được để trống")

    if not APP_CALLER or not hasattr(APP_CALLER, "runSearch"):
        raise HTTPException(status_code=500, detail="Không tìm thấy App_Caller.runSearch().")

    try:
        results = APP_CALLER.runSearch(q)
        if isinstance(results, list):
            formatted = results[:body.k]
        elif isinstance(results, dict) and "results" in results:
            formatted = results["results"][:body.k]
        else:
            formatted = [str(results)]
        return {"status": "success", "results": formatted}
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Lỗi tìm kiếm: {str(e)}")

# -------------------------
# 🧠 /summarize
# -------------------------
class SummIn(BaseModel):
    text: str
    minInput: int = 256
    maxInput: int = 1024

@app.post("/summarize")
def summarize_text(body: SummIn, _=Depends(require_bearer)):
    """Tóm tắt văn bản bằng App_Caller.summarizer_engine."""
    text = (body.text or "").strip()
    if not text:
        raise HTTPException(status_code=400, detail="text không được để trống")

    if not APP_CALLER or not hasattr(APP_CALLER, "summarizer_engine"):
        raise HTTPException(status_code=500, detail="Không tìm thấy App_Caller.summarizer_engine.")

    try:
        summarized = APP_CALLER.summarizer_engine.summarize(
            text, minInput=body.minInput, maxInput=body.maxInput
        )
        return {"status": "success", "summary": summarized.get("summary_text", "")}
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Lỗi tóm tắt: {str(e)}")