doc-ai-api / Config /Configs.py
LongK171's picture
try
5e92cb9
raw
history blame
2.75 kB
import logging
import os
import faiss
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"
def ConfigValues(service="Search", inputs="file.pdf"):
# Inputs
inputFolder = f"./Private/Tests"
inputPath = f"{inputFolder}/{inputs}"
# Assets
assetsFolder = f"./Assets"
exceptPath = f"{assetsFolder}/ex.exceptions.json"
markerPath = f"{assetsFolder}/ex.markers.json"
statusPath = f"{assetsFolder}/ex.status.json"
# Documents
DocFolder = "./Documents"
DocPath = f"{DocFolder}/{service}"
PdfPath = f"{DocPath}.pdf"
DocPath = f"{DocPath}.docx"
# Database
DBFolder = "./Database"
DBPath = f"{DBFolder}/{service}/{service}"
RawExtractPath = f"{DBPath}_Extract"
ChunksPath = f"{DBPath}_Chunks"
EmbeddingPath = f"{DBPath}_Embedding"
RawDataPath = f"{RawExtractPath}_Raw.json"
RawLvlsPath = f"{RawExtractPath}_Levels.json"
StructsPath = f"{ChunksPath}_Struct.json"
SegmentPath = f"{ChunksPath}_Segment.json"
SchemaPath = f"{ChunksPath}_Schema.json"
FaissPath = f"{EmbeddingPath}_Index.faiss"
MappingPath = f"{EmbeddingPath}_Mapping.json"
MapDataPath = f"{EmbeddingPath}_MapData.json"
MapChunkPath = f"{EmbeddingPath}_MapChunk.json"
MetaPath = f"{EmbeddingPath}_Meta.json"
# Keys
DATA_KEY = "contents"
EMBE_KEY = "embeddings"
# Models
SEARCH_EGINE = faiss.IndexFlatIP
RERANK_MODEL = "BAAI/bge-reranker-base"
CHUNKS_MODEL = "paraphrase-multilingual-MiniLM-L12-v2"
EMBEDD_MODEL = "VoVanPhuc/sup-SimCSE-VietNamese-phobert-base"
RESPON_MODEL = "gpt-3.5-turbo"
SUMARY_MODEL = "LongK171/bartpho-syllable-vnexpress"
WORD_LIMIT = 1000
return {
"inputPath": inputPath,
"PdfPath": PdfPath,
"DocPath": DocPath,
"exceptPath": exceptPath,
"markerPath": markerPath,
"statusPath": statusPath,
"RawDataPath": RawDataPath,
"RawLvlsPath": RawLvlsPath,
"StructsPath": StructsPath,
"SegmentPath": SegmentPath,
"SchemaPath": SchemaPath,
"FaissPath": FaissPath,
"MappingPath": MappingPath,
"MapDataPath": MapDataPath,
"MapChunkPath": MapChunkPath,
"MetaPath": MetaPath,
"DATA_KEY": DATA_KEY,
"EMBE_KEY": EMBE_KEY,
"SEARCH_EGINE": SEARCH_EGINE,
"RERANK_MODEL": RERANK_MODEL,
"RESPON_MODEL": RESPON_MODEL,
"CHUNKS_MODEL": CHUNKS_MODEL,
"EMBEDD_MODEL": EMBEDD_MODEL,
"SUMARY_MODEL": SUMARY_MODEL,
"WORD_LIMIT": WORD_LIMIT
}