Spaces:
Sleeping
Sleeping
File size: 2,747 Bytes
dbe2c62 5e92cb9 dbe2c62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import logging
import os
import faiss
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"
def ConfigValues(service="Search", inputs="file.pdf"):
# Inputs
inputFolder = f"./Private/Tests"
inputPath = f"{inputFolder}/{inputs}"
# Assets
assetsFolder = f"./Assets"
exceptPath = f"{assetsFolder}/ex.exceptions.json"
markerPath = f"{assetsFolder}/ex.markers.json"
statusPath = f"{assetsFolder}/ex.status.json"
# Documents
DocFolder = "./Documents"
DocPath = f"{DocFolder}/{service}"
PdfPath = f"{DocPath}.pdf"
DocPath = f"{DocPath}.docx"
# Database
DBFolder = "./Database"
DBPath = f"{DBFolder}/{service}/{service}"
RawExtractPath = f"{DBPath}_Extract"
ChunksPath = f"{DBPath}_Chunks"
EmbeddingPath = f"{DBPath}_Embedding"
RawDataPath = f"{RawExtractPath}_Raw.json"
RawLvlsPath = f"{RawExtractPath}_Levels.json"
StructsPath = f"{ChunksPath}_Struct.json"
SegmentPath = f"{ChunksPath}_Segment.json"
SchemaPath = f"{ChunksPath}_Schema.json"
FaissPath = f"{EmbeddingPath}_Index.faiss"
MappingPath = f"{EmbeddingPath}_Mapping.json"
MapDataPath = f"{EmbeddingPath}_MapData.json"
MapChunkPath = f"{EmbeddingPath}_MapChunk.json"
MetaPath = f"{EmbeddingPath}_Meta.json"
# Keys
DATA_KEY = "contents"
EMBE_KEY = "embeddings"
# Models
SEARCH_EGINE = faiss.IndexFlatIP
RERANK_MODEL = "BAAI/bge-reranker-base"
CHUNKS_MODEL = "paraphrase-multilingual-MiniLM-L12-v2"
EMBEDD_MODEL = "VoVanPhuc/sup-SimCSE-VietNamese-phobert-base"
RESPON_MODEL = "gpt-3.5-turbo"
SUMARY_MODEL = "LongK171/bartpho-syllable-vnexpress"
WORD_LIMIT = 1000
return {
"inputPath": inputPath,
"PdfPath": PdfPath,
"DocPath": DocPath,
"exceptPath": exceptPath,
"markerPath": markerPath,
"statusPath": statusPath,
"RawDataPath": RawDataPath,
"RawLvlsPath": RawLvlsPath,
"StructsPath": StructsPath,
"SegmentPath": SegmentPath,
"SchemaPath": SchemaPath,
"FaissPath": FaissPath,
"MappingPath": MappingPath,
"MapDataPath": MapDataPath,
"MapChunkPath": MapChunkPath,
"MetaPath": MetaPath,
"DATA_KEY": DATA_KEY,
"EMBE_KEY": EMBE_KEY,
"SEARCH_EGINE": SEARCH_EGINE,
"RERANK_MODEL": RERANK_MODEL,
"RESPON_MODEL": RESPON_MODEL,
"CHUNKS_MODEL": CHUNKS_MODEL,
"EMBEDD_MODEL": EMBEDD_MODEL,
"SUMARY_MODEL": SUMARY_MODEL,
"WORD_LIMIT": WORD_LIMIT
}
|