Statements-Reconciliation / app_chatgpt.py
rishisriv-bh's picture
Update app_chatgpt.py
16f1089 verified
import openai
import pandas as pd
import os
import gradio as gr
import boto3
import uuid
import logging
import sys
from datetime import datetime
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout), # This will show in HF Spaces logs
logging.FileHandler('app.log', mode='a') # Also save to file
]
)
logger = logging.getLogger(__name__)
# Log application startup
logger.info("πŸš€ Starting ERP Reconciliation Tool")
logger.info(f"πŸ“… Application started at: {datetime.now()}")
try:
openai.api_key = os.environ.get("Open_ai_key")
bucket_name = os.environ.get("Bucket_name")
if not openai.api_key:
logger.warning("⚠️ OpenAI API key not found in environment variables")
else:
logger.info("βœ… OpenAI API key loaded successfully")
if not bucket_name:
logger.warning("⚠️ S3 bucket name not found in environment variables")
else:
logger.info(f"βœ… S3 bucket configured: {bucket_name}")
except Exception as e:
logger.error(f"❌ Error loading environment variables: {e}")
try:
session = boto3.Session(
aws_access_key_id=os.getenv("access_key"),
aws_secret_access_key=os.getenv("secret_access_key")
)
logger.info("βœ… AWS session created successfully")
except Exception as e:
logger.error(f"❌ Error creating AWS session: {e}")
def upload_files_to_s3(erp_file_path, external_file_path, content, bucket_name):
logger.info(f"πŸ“€ Starting S3 upload process")
try:
s3 = session.client("s3")
session_id = str(uuid.uuid4())
logger.info(f"πŸ†” Generated session ID: {session_id}")
base_erp = os.path.splitext(os.path.basename(erp_file_path))[0]
base_bank = os.path.splitext(os.path.basename(external_file_path))[0]
# Add session ID to filenames
erp_filename = f"{base_erp}_{session_id}.csv"
bank_filename = f"{base_bank}_{session_id}.csv"
txt_filename = f"{session_id}.txt"
logger.info(f"πŸ“„ Generated filenames - ERP: {erp_filename}, Bank: {bank_filename}, Result: {txt_filename}")
# Write reconciliation output to local file
with open(txt_filename, "w", encoding="utf-8") as file:
file.write(content)
logger.info(f"βœ… Reconciliation result written to local file: {txt_filename}")
# === Separate folders by type ===
erp_s3_key = f"ERP Statements/{erp_filename}"
bank_s3_key = f"Bank Statements/{bank_filename}"
txt_s3_key = f"Reconciliation Results/{txt_filename}"
# === Combined folder for each run ===
combined_prefix = f"Combined Files/{session_id}/"
erp_combined_key = combined_prefix + erp_filename
bank_combined_key = combined_prefix + bank_filename
txt_combined_key = combined_prefix + txt_filename
# βœ… Upload to type-based folders
logger.info("πŸ“€ Uploading files to type-based S3 folders...")
s3.upload_file(erp_file_path, bucket_name, erp_s3_key)
logger.info(f"βœ… Uploaded ERP file to: {erp_s3_key}")
s3.upload_file(external_file_path, bucket_name, bank_s3_key)
logger.info(f"βœ… Uploaded Bank file to: {bank_s3_key}")
s3.upload_file(txt_filename, bucket_name, txt_s3_key)
logger.info(f"βœ… Uploaded result file to: {txt_s3_key}")
# βœ… Upload to combined folder
logger.info("πŸ“€ Uploading files to combined S3 folder...")
s3.upload_file(erp_file_path, bucket_name, erp_combined_key)
s3.upload_file(external_file_path, bucket_name, bank_combined_key)
s3.upload_file(txt_filename, bucket_name, txt_combined_key)
logger.info(f"βœ… All files uploaded to combined folder: {combined_prefix}")
# Clean up local file
os.remove(txt_filename)
logger.info(f"πŸ—‘οΈ Cleaned up local file: {txt_filename}")
upload_result = {
"erp_s3_key": erp_s3_key,
"bank_s3_key": bank_s3_key,
"result_s3_key": txt_s3_key,
"combined_keys": [erp_combined_key, bank_combined_key, txt_combined_key],
"session_id": session_id
}
logger.info(f"βœ… S3 upload process completed successfully for session: {session_id}")
return upload_result
except Exception as e:
logger.error(f"❌ Error in S3 upload process: {e}")
raise e
# βœ… Extract transactions from CSV only
def extract_transactions(file):
logger.info(f"πŸ“Š Processing file: {file.name}")
filename = file.name.lower()
if not filename.endswith(".csv"):
error_msg = f"Unsupported file format: {filename}. Please upload a CSV file only."
logger.error(f"❌ {error_msg}")
raise ValueError(error_msg)
try:
df = pd.read_csv(file.name)
logger.info(f"βœ… Successfully read CSV file: {file.name}")
logger.info(f"πŸ“ˆ File contains {len(df)} rows and {len(df.columns)} columns")
logger.info(f"🏷️ Column names: {list(df.columns)}")
return df.to_string(index=False)
except Exception as e:
error_msg = f"Error processing {file.name}: {e}"
logger.error(f"❌ {error_msg}")
raise ValueError(error_msg)
# βœ… Reconcile the statements using OpenAI
def reconcile_statements_openai(erp_file, external_file):
session_start = datetime.now()
logger.info(f"πŸ” Starting reconciliation process at: {session_start}")
# Validate inputs
if not erp_file or not external_file:
error_msg = "Both ERP and External files are required"
logger.error(f"❌ {error_msg}")
yield "❌ Error: Missing files", f"<h3>Error</h3><pre>{error_msg}</pre>", None
return
logger.info(f"πŸ“ ERP file: {erp_file.name if erp_file else 'None'}")
logger.info(f"🏦 External file: {external_file.name if external_file else 'None'}")
yield "⏳ Processing your request...", "", None
try:
# Extract data from files
logger.info("πŸ“Š Extracting data from ERP file...")
erp_data = extract_transactions(erp_file)
logger.info("πŸ“Š Extracting data from External file...")
external_data = extract_transactions(external_file)
prompt = f"""
You are a financial analyst specializing in account reconciliations. Your task is to compare two data sets: one from an ERP system and the other from a
Bank or Vendor statement.
The goal is to identify which transactions match across both data sets, and which transactions are unmatched or potentially erroneous.
Each dataset contains transaction entries with **Date**, **Amount**, and **Description**.
ERP descriptions may include prefixes like "Vendor Payment - ", while external descriptions are simpler.
Please attempt to normalize and fuzzy-match transactions by:
- Ignoring common prefixes/suffixes
- Allowing for small amount rounding differences (Β±$0.01–$1)
- Matching based on partial vendor or keyword overlaps
.
---
Please follow this format in your response:
1. πŸ“˜ **Introduction**
Briefly explain what reconciliation means and how you'll approach it.
2. βœ… **Matched Transactions**
Make a table to show side by side comparison of matched transactions
3. 🏦 **Unmatched Transactions**
List any transactions found in
a. External file but not in the ERP file OR
b. ERP file but Not in External file
4. 🧾 **Summary & Suggested Next Steps**
Explain what the discrepancies might mean and what the user should do next.
---
Here is the ERP data:
{erp_data}
---
Here is the External (Bank or Vendor) data:
{external_data}
"""
logger.info("πŸ€– Sending request to OpenAI...")
logger.info(f"πŸ“ Prompt length: {len(prompt)} characters")
response = openai.ChatCompletion.create(
model="gpt-5",
messages=[
{"role": "system",
"content": "You are a financial analyst who specializes in reconciling financial data."},
{"role": "user", "content": prompt}
],
temperature=1,
)
logger.info("βœ… Received response from OpenAI")
content = response.choices[0].message['content']
logger.info(f"πŸ“„ Response length: {len(content)} characters")
html = f"""
<div style="font-family: 'Segoe UI', sans-serif; line-height: 1.5;">
<h2>πŸ” Reconciliation Report</h2>
<pre>{content}</pre>
</div>
"""
# Calculate processing time
processing_time = (datetime.now() - session_start).total_seconds()
logger.info(f"⏱️ Processing completed in {processing_time:.2f} seconds")
download_filename = f"reconciliation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
with open(download_filename, "w", encoding="utf-8") as f:
f.write(content)
yield "βœ… Done!", html, download_filename
# βœ… Upload the files + result to S3
try:
logger.info("πŸ“€ Starting S3 upload...")
upload_result = upload_files_to_s3(
erp_file_path=erp_file.name,
external_file_path=external_file.name,
content=html,
bucket_name=bucket_name
)
logger.info(f"βœ… Files uploaded successfully with session ID: {upload_result['session_id']}")
except Exception as upload_error:
logger.error(f"❌ S3 upload failed: {upload_error}")
# Don't fail the entire process if S3 upload fails
logger.info("⚠️ Continuing despite S3 upload failure")
logger.info(f"πŸŽ‰ Reconciliation process completed successfully!")
except Exception as e:
error_msg = f"Error during reconciliation: {e}"
logger.error(f"❌ {error_msg}")
logger.error(f"πŸ› Full error details: {str(e)}")
yield "❌ Error occurred", f"<h3>Error</h3><pre>{error_msg}</pre>", None
# βœ… Gradio UI
with gr.Blocks(css="""#company-logo { width: 25%; margin: auto; display: block; }""") as iface:
gr.Image("logo_Icon.png", elem_id="company-logo", label="Beiing Human")
gr.Markdown("## πŸ“Š ERP vs Bank/Vendor Reconciliation Tool")
with gr.Row():
erp_file = gr.File(label="πŸ“ Upload ERP Statement (CSV only)", type="filepath", file_types=[".csv"])
external_file = gr.File(label="🏦 Upload Bank or Vendor Statement (CSV only)", type="filepath", file_types=[".csv"])
btn = gr.Button("πŸ” Reconcile")
status = gr.Markdown()
result = gr.HTML()
download = gr.File(label="⬇️ Download Reconciliation Report", type="filepath")
btn.click(
fn=reconcile_statements_openai,
inputs=[erp_file, external_file],
outputs=[status, result, download]
)
iface.launch()