Spaces:
Sleeping
Sleeping
| import logging | |
| import os | |
| import faiss | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1" | |
| os.environ["CUDA_LAUNCH_BLOCKING"] = "1" | |
| os.environ["TORCH_USE_CUDA_DSA"] = "1" | |
| def ConfigValues(service="Search", inputs="file.pdf"): | |
| # Inputs | |
| inputFolder = f"./Private/Tests" | |
| inputPath = f"{inputFolder}/{inputs}" | |
| # Assets | |
| assetsFolder = f"./Assets" | |
| exceptPath = f"{assetsFolder}/ex.exceptions.json" | |
| markerPath = f"{assetsFolder}/ex.markers.json" | |
| statusPath = f"{assetsFolder}/ex.status.json" | |
| # Documents | |
| DocFolder = "./Documents" | |
| DocPath = f"{DocFolder}/{service}" | |
| PdfPath = f"{DocPath}.pdf" | |
| DocPath = f"{DocPath}.docx" | |
| # Database | |
| DBFolder = "./Database" | |
| DBPath = f"{DBFolder}/{service}/{service}" | |
| RawExtractPath = f"{DBPath}_Extract" | |
| ChunksPath = f"{DBPath}_Chunks" | |
| EmbeddingPath = f"{DBPath}_Embedding" | |
| RawDataPath = f"{RawExtractPath}_Raw.json" | |
| RawLvlsPath = f"{RawExtractPath}_Levels.json" | |
| StructsPath = f"{ChunksPath}_Struct.json" | |
| SegmentPath = f"{ChunksPath}_Segment.json" | |
| SchemaPath = f"{ChunksPath}_Schema.json" | |
| FaissPath = f"{EmbeddingPath}_Index.faiss" | |
| MappingPath = f"{EmbeddingPath}_Mapping.json" | |
| MapDataPath = f"{EmbeddingPath}_MapData.json" | |
| MapChunkPath = f"{EmbeddingPath}_MapChunk.json" | |
| MetaPath = f"{EmbeddingPath}_Meta.json" | |
| # Keys | |
| DATA_KEY = "contents" | |
| EMBE_KEY = "embeddings" | |
| # Models | |
| SEARCH_EGINE = faiss.IndexFlatIP | |
| RERANK_MODEL = "BAAI/bge-reranker-base" | |
| CHUNKS_MODEL = "paraphrase-multilingual-MiniLM-L12-v2" | |
| EMBEDD_MODEL = "VoVanPhuc/sup-SimCSE-VietNamese-phobert-base" | |
| RESPON_MODEL = "gpt-3.5-turbo" | |
| SUMARY_MODEL = "LongK171/bartpho-syllable-vnexpress" | |
| WORD_LIMIT = 1000 | |
| return { | |
| "inputPath": inputPath, | |
| "PdfPath": PdfPath, | |
| "DocPath": DocPath, | |
| "exceptPath": exceptPath, | |
| "markerPath": markerPath, | |
| "statusPath": statusPath, | |
| "RawDataPath": RawDataPath, | |
| "RawLvlsPath": RawLvlsPath, | |
| "StructsPath": StructsPath, | |
| "SegmentPath": SegmentPath, | |
| "SchemaPath": SchemaPath, | |
| "FaissPath": FaissPath, | |
| "MappingPath": MappingPath, | |
| "MapDataPath": MapDataPath, | |
| "MapChunkPath": MapChunkPath, | |
| "MetaPath": MetaPath, | |
| "DATA_KEY": DATA_KEY, | |
| "EMBE_KEY": EMBE_KEY, | |
| "SEARCH_EGINE": SEARCH_EGINE, | |
| "RERANK_MODEL": RERANK_MODEL, | |
| "RESPON_MODEL": RESPON_MODEL, | |
| "CHUNKS_MODEL": CHUNKS_MODEL, | |
| "EMBEDD_MODEL": EMBEDD_MODEL, | |
| "SUMARY_MODEL": SUMARY_MODEL, | |
| "WORD_LIMIT": WORD_LIMIT | |
| } | |