import openai import pandas as pd import os import gradio as gr import boto3 import uuid import logging import sys from datetime import datetime logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(sys.stdout), # This will show in HF Spaces logs logging.FileHandler('app.log', mode='a') # Also save to file ] ) logger = logging.getLogger(__name__) # Log application startup logger.info("๐ Starting ERP Reconciliation Tool") logger.info(f"๐ Application started at: {datetime.now()}") try: openai.api_key = os.environ.get("Open_ai_key") bucket_name = os.environ.get("Bucket_name") if not openai.api_key: logger.warning("โ ๏ธ OpenAI API key not found in environment variables") else: logger.info("โ OpenAI API key loaded successfully") if not bucket_name: logger.warning("โ ๏ธ S3 bucket name not found in environment variables") else: logger.info(f"โ S3 bucket configured: {bucket_name}") except Exception as e: logger.error(f"โ Error loading environment variables: {e}") try: session = boto3.Session( aws_access_key_id=os.getenv("access_key"), aws_secret_access_key=os.getenv("secret_access_key") ) logger.info("โ AWS session created successfully") except Exception as e: logger.error(f"โ Error creating AWS session: {e}") def upload_files_to_s3(erp_file_path, external_file_path, content, bucket_name): logger.info(f"๐ค Starting S3 upload process") try: s3 = session.client("s3") session_id = str(uuid.uuid4()) logger.info(f"๐ Generated session ID: {session_id}") base_erp = os.path.splitext(os.path.basename(erp_file_path))[0] base_bank = os.path.splitext(os.path.basename(external_file_path))[0] # Add session ID to filenames erp_filename = f"{base_erp}_{session_id}.csv" bank_filename = f"{base_bank}_{session_id}.csv" txt_filename = f"{session_id}.txt" logger.info(f"๐ Generated filenames - ERP: {erp_filename}, Bank: {bank_filename}, Result: {txt_filename}") # Write reconciliation output to local file with open(txt_filename, "w", encoding="utf-8") as file: file.write(content) logger.info(f"โ Reconciliation result written to local file: {txt_filename}") # === Separate folders by type === erp_s3_key = f"ERP Statements/{erp_filename}" bank_s3_key = f"Bank Statements/{bank_filename}" txt_s3_key = f"Reconciliation Results/{txt_filename}" # === Combined folder for each run === combined_prefix = f"Combined Files/{session_id}/" erp_combined_key = combined_prefix + erp_filename bank_combined_key = combined_prefix + bank_filename txt_combined_key = combined_prefix + txt_filename # โ Upload to type-based folders logger.info("๐ค Uploading files to type-based S3 folders...") s3.upload_file(erp_file_path, bucket_name, erp_s3_key) logger.info(f"โ Uploaded ERP file to: {erp_s3_key}") s3.upload_file(external_file_path, bucket_name, bank_s3_key) logger.info(f"โ Uploaded Bank file to: {bank_s3_key}") s3.upload_file(txt_filename, bucket_name, txt_s3_key) logger.info(f"โ Uploaded result file to: {txt_s3_key}") # โ Upload to combined folder logger.info("๐ค Uploading files to combined S3 folder...") s3.upload_file(erp_file_path, bucket_name, erp_combined_key) s3.upload_file(external_file_path, bucket_name, bank_combined_key) s3.upload_file(txt_filename, bucket_name, txt_combined_key) logger.info(f"โ All files uploaded to combined folder: {combined_prefix}") # Clean up local file os.remove(txt_filename) logger.info(f"๐๏ธ Cleaned up local file: {txt_filename}") upload_result = { "erp_s3_key": erp_s3_key, "bank_s3_key": bank_s3_key, "result_s3_key": txt_s3_key, "combined_keys": [erp_combined_key, bank_combined_key, txt_combined_key], "session_id": session_id } logger.info(f"โ S3 upload process completed successfully for session: {session_id}") return upload_result except Exception as e: logger.error(f"โ Error in S3 upload process: {e}") raise e # โ Extract transactions from CSV only def extract_transactions(file): logger.info(f"๐ Processing file: {file.name}") filename = file.name.lower() if not filename.endswith(".csv"): error_msg = f"Unsupported file format: {filename}. Please upload a CSV file only." logger.error(f"โ {error_msg}") raise ValueError(error_msg) try: df = pd.read_csv(file.name) logger.info(f"โ Successfully read CSV file: {file.name}") logger.info(f"๐ File contains {len(df)} rows and {len(df.columns)} columns") logger.info(f"๐ท๏ธ Column names: {list(df.columns)}") return df.to_string(index=False) except Exception as e: error_msg = f"Error processing {file.name}: {e}" logger.error(f"โ {error_msg}") raise ValueError(error_msg) # โ Reconcile the statements using OpenAI def reconcile_statements_openai(erp_file, external_file): session_start = datetime.now() logger.info(f"๐ Starting reconciliation process at: {session_start}") # Validate inputs if not erp_file or not external_file: error_msg = "Both ERP and External files are required" logger.error(f"โ {error_msg}") yield "โ Error: Missing files", f"
{error_msg}", None
return
logger.info(f"๐ ERP file: {erp_file.name if erp_file else 'None'}")
logger.info(f"๐ฆ External file: {external_file.name if external_file else 'None'}")
yield "โณ Processing your request...", "", None
try:
# Extract data from files
logger.info("๐ Extracting data from ERP file...")
erp_data = extract_transactions(erp_file)
logger.info("๐ Extracting data from External file...")
external_data = extract_transactions(external_file)
prompt = f"""
You are a financial analyst specializing in account reconciliations. Your task is to compare two data sets: one from an ERP system and the other from a
Bank or Vendor statement.
The goal is to identify which transactions match across both data sets, and which transactions are unmatched or potentially erroneous.
Each dataset contains transaction entries with **Date**, **Amount**, and **Description**.
ERP descriptions may include prefixes like "Vendor Payment - ", while external descriptions are simpler.
Please attempt to normalize and fuzzy-match transactions by:
- Ignoring common prefixes/suffixes
- Allowing for small amount rounding differences (ยฑ$0.01โ$1)
- Matching based on partial vendor or keyword overlaps
.
---
Please follow this format in your response:
1. ๐ **Introduction**
Briefly explain what reconciliation means and how you'll approach it.
2. โ
**Matched Transactions**
Make a table to show side by side comparison of matched transactions
3. ๐ฆ **Unmatched Transactions**
List any transactions found in
a. External file but not in the ERP file OR
b. ERP file but Not in External file
4. ๐งพ **Summary & Suggested Next Steps**
Explain what the discrepancies might mean and what the user should do next.
---
Here is the ERP data:
{erp_data}
---
Here is the External (Bank or Vendor) data:
{external_data}
"""
logger.info("๐ค Sending request to OpenAI...")
logger.info(f"๐ Prompt length: {len(prompt)} characters")
response = openai.ChatCompletion.create(
model="gpt-5",
messages=[
{"role": "system",
"content": "You are a financial analyst who specializes in reconciling financial data."},
{"role": "user", "content": prompt}
],
temperature=1,
)
logger.info("โ
Received response from OpenAI")
content = response.choices[0].message['content']
logger.info(f"๐ Response length: {len(content)} characters")
html = f"""
{content}
{error_msg}", None
# โ
Gradio UI
with gr.Blocks(css="""#company-logo { width: 25%; margin: auto; display: block; }""") as iface:
gr.Image("logo_Icon.png", elem_id="company-logo", label="Beiing Human")
gr.Markdown("## ๐ ERP vs Bank/Vendor Reconciliation Tool")
with gr.Row():
erp_file = gr.File(label="๐ Upload ERP Statement (CSV only)", type="filepath", file_types=[".csv"])
external_file = gr.File(label="๐ฆ Upload Bank or Vendor Statement (CSV only)", type="filepath", file_types=[".csv"])
btn = gr.Button("๐ Reconcile")
status = gr.Markdown()
result = gr.HTML()
download = gr.File(label="โฌ๏ธ Download Reconciliation Report", type="filepath")
btn.click(
fn=reconcile_statements_openai,
inputs=[erp_file, external_file],
outputs=[status, result, download]
)
iface.launch()