Spaces:

rishisriv-bh
/

Statements-Reconciliation

Sleeping

App Files Files Community

Statements-Reconciliation / app_chatgpt.py

rishisriv-bh

Update app_chatgpt.py

16f1089 verified about 2 months ago

raw

history blame contribute delete

10.9 kB

	import openai
	import pandas as pd
	import os
	import gradio as gr
	import boto3
	import uuid
	import logging
	import sys
	from datetime import datetime

	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	handlers=[
	logging.StreamHandler(sys.stdout), # This will show in HF Spaces logs
	logging.FileHandler('app.log', mode='a') # Also save to file
	]
	)

	logger = logging.getLogger(__name__)

	# Log application startup
	logger.info("🚀 Starting ERP Reconciliation Tool")
	logger.info(f"📅 Application started at: {datetime.now()}")


	try:
	openai.api_key = os.environ.get("Open_ai_key")
	bucket_name = os.environ.get("Bucket_name")

	if not openai.api_key:
	logger.warning("⚠️ OpenAI API key not found in environment variables")
	else:
	logger.info("✅ OpenAI API key loaded successfully")

	if not bucket_name:
	logger.warning("⚠️ S3 bucket name not found in environment variables")
	else:
	logger.info(f"✅ S3 bucket configured: {bucket_name}")

	except Exception as e:
	logger.error(f"❌ Error loading environment variables: {e}")

	try:
	session = boto3.Session(
	aws_access_key_id=os.getenv("access_key"),
	aws_secret_access_key=os.getenv("secret_access_key")
	)
	logger.info("✅ AWS session created successfully")
	except Exception as e:
	logger.error(f"❌ Error creating AWS session: {e}")


	def upload_files_to_s3(erp_file_path, external_file_path, content, bucket_name):
	logger.info(f"📤 Starting S3 upload process")

	try:
	s3 = session.client("s3")
	session_id = str(uuid.uuid4())

	logger.info(f"🆔 Generated session ID: {session_id}")

	base_erp = os.path.splitext(os.path.basename(erp_file_path))[0]
	base_bank = os.path.splitext(os.path.basename(external_file_path))[0]

	# Add session ID to filenames
	erp_filename = f"{base_erp}_{session_id}.csv"
	bank_filename = f"{base_bank}_{session_id}.csv"
	txt_filename = f"{session_id}.txt"

	logger.info(f"📄 Generated filenames - ERP: {erp_filename}, Bank: {bank_filename}, Result: {txt_filename}")

	# Write reconciliation output to local file
	with open(txt_filename, "w", encoding="utf-8") as file:
	file.write(content)
	logger.info(f"✅ Reconciliation result written to local file: {txt_filename}")

	# === Separate folders by type ===
	erp_s3_key = f"ERP Statements/{erp_filename}"
	bank_s3_key = f"Bank Statements/{bank_filename}"
	txt_s3_key = f"Reconciliation Results/{txt_filename}"

	# === Combined folder for each run ===
	combined_prefix = f"Combined Files/{session_id}/"
	erp_combined_key = combined_prefix + erp_filename
	bank_combined_key = combined_prefix + bank_filename
	txt_combined_key = combined_prefix + txt_filename

	# ✅ Upload to type-based folders
	logger.info("📤 Uploading files to type-based S3 folders...")
	s3.upload_file(erp_file_path, bucket_name, erp_s3_key)
	logger.info(f"✅ Uploaded ERP file to: {erp_s3_key}")

	s3.upload_file(external_file_path, bucket_name, bank_s3_key)
	logger.info(f"✅ Uploaded Bank file to: {bank_s3_key}")

	s3.upload_file(txt_filename, bucket_name, txt_s3_key)
	logger.info(f"✅ Uploaded result file to: {txt_s3_key}")

	# ✅ Upload to combined folder
	logger.info("📤 Uploading files to combined S3 folder...")
	s3.upload_file(erp_file_path, bucket_name, erp_combined_key)
	s3.upload_file(external_file_path, bucket_name, bank_combined_key)
	s3.upload_file(txt_filename, bucket_name, txt_combined_key)
	logger.info(f"✅ All files uploaded to combined folder: {combined_prefix}")

	# Clean up local file
	os.remove(txt_filename)
	logger.info(f"🗑️ Cleaned up local file: {txt_filename}")

	upload_result = {
	"erp_s3_key": erp_s3_key,
	"bank_s3_key": bank_s3_key,
	"result_s3_key": txt_s3_key,
	"combined_keys": [erp_combined_key, bank_combined_key, txt_combined_key],
	"session_id": session_id
	}

	logger.info(f"✅ S3 upload process completed successfully for session: {session_id}")
	return upload_result

	except Exception as e:
	logger.error(f"❌ Error in S3 upload process: {e}")
	raise e


	# ✅ Extract transactions from CSV only
	def extract_transactions(file):
	logger.info(f"📊 Processing file: {file.name}")

	filename = file.name.lower()
	if not filename.endswith(".csv"):
	error_msg = f"Unsupported file format: {filename}. Please upload a CSV file only."
	logger.error(f"❌ {error_msg}")
	raise ValueError(error_msg)

	try:
	df = pd.read_csv(file.name)
	logger.info(f"✅ Successfully read CSV file: {file.name}")
	logger.info(f"📈 File contains {len(df)} rows and {len(df.columns)} columns")
	logger.info(f"🏷️ Column names: {list(df.columns)}")

	return df.to_string(index=False)
	except Exception as e:
	error_msg = f"Error processing {file.name}: {e}"
	logger.error(f"❌ {error_msg}")
	raise ValueError(error_msg)


	# ✅ Reconcile the statements using OpenAI
	def reconcile_statements_openai(erp_file, external_file):
	session_start = datetime.now()
	logger.info(f"🔍 Starting reconciliation process at: {session_start}")

	# Validate inputs
	if not erp_file or not external_file:
	error_msg = "Both ERP and External files are required"
	logger.error(f"❌ {error_msg}")
	yield "❌ Error: Missing files", f"<h3>Error</h3><pre>{error_msg}</pre>", None
	return

	logger.info(f"📁 ERP file: {erp_file.name if erp_file else 'None'}")
	logger.info(f"🏦 External file: {external_file.name if external_file else 'None'}")

	yield "⏳ Processing your request...", "", None

	try:
	# Extract data from files
	logger.info("📊 Extracting data from ERP file...")
	erp_data = extract_transactions(erp_file)

	logger.info("📊 Extracting data from External file...")
	external_data = extract_transactions(external_file)

	prompt = f"""
	You are a financial analyst specializing in account reconciliations. Your task is to compare two data sets: one from an ERP system and the other from a
	Bank or Vendor statement.
	The goal is to identify which transactions match across both data sets, and which transactions are unmatched or potentially erroneous.
	Each dataset contains transaction entries with Date, Amount, and Description.
	ERP descriptions may include prefixes like "Vendor Payment - ", while external descriptions are simpler.
	Please attempt to normalize and fuzzy-match transactions by:
	- Ignoring common prefixes/suffixes
	- Allowing for small amount rounding differences (±$0.01–$1)
	- Matching based on partial vendor or keyword overlaps
	.
	---
	Please follow this format in your response:
	1. 📘 Introduction
	Briefly explain what reconciliation means and how you'll approach it.
	2. ✅ Matched Transactions
	Make a table to show side by side comparison of matched transactions
	3. 🏦 Unmatched Transactions
	List any transactions found in
	a. External file but not in the ERP file OR
	b. ERP file but Not in External file
	4. 🧾 Summary & Suggested Next Steps
	Explain what the discrepancies might mean and what the user should do next.
	---
	Here is the ERP data:
	{erp_data}
	---
	Here is the External (Bank or Vendor) data:
	{external_data}
	"""

	logger.info("🤖 Sending request to OpenAI...")
	logger.info(f"📝 Prompt length: {len(prompt)} characters")

	response = openai.ChatCompletion.create(
	model="gpt-5",
	messages=[
	{"role": "system",
	"content": "You are a financial analyst who specializes in reconciling financial data."},
	{"role": "user", "content": prompt}
	],
	temperature=1,
	)

	logger.info("✅ Received response from OpenAI")

	content = response.choices[0].message['content']
	logger.info(f"📄 Response length: {len(content)} characters")

	html = f"""
	<div style="font-family: 'Segoe UI', sans-serif; line-height: 1.5;">
	<h2>🔍 Reconciliation Report</h2>
	<pre>{content}</pre>
	</div>
	"""

	# Calculate processing time
	processing_time = (datetime.now() - session_start).total_seconds()
	logger.info(f"⏱️ Processing completed in {processing_time:.2f} seconds")
	download_filename = f"reconciliation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
	with open(download_filename, "w", encoding="utf-8") as f:
	f.write(content)

	yield "✅ Done!", html, download_filename

	# ✅ Upload the files + result to S3
	try:
	logger.info("📤 Starting S3 upload...")
	upload_result = upload_files_to_s3(
	erp_file_path=erp_file.name,
	external_file_path=external_file.name,
	content=html,
	bucket_name=bucket_name
	)
	logger.info(f"✅ Files uploaded successfully with session ID: {upload_result['session_id']}")
	except Exception as upload_error:
	logger.error(f"❌ S3 upload failed: {upload_error}")
	# Don't fail the entire process if S3 upload fails
	logger.info("⚠️ Continuing despite S3 upload failure")

	logger.info(f"🎉 Reconciliation process completed successfully!")

	except Exception as e:
	error_msg = f"Error during reconciliation: {e}"
	logger.error(f"❌ {error_msg}")
	logger.error(f"🐛 Full error details: {str(e)}")
	yield "❌ Error occurred", f"<h3>Error</h3><pre>{error_msg}</pre>", None


	# ✅ Gradio UI
	with gr.Blocks(css="""#company-logo { width: 25%; margin: auto; display: block; }""") as iface:
	gr.Image("logo_Icon.png", elem_id="company-logo", label="Beiing Human")
	gr.Markdown("## 📊 ERP vs Bank/Vendor Reconciliation Tool")

	with gr.Row():
	erp_file = gr.File(label="📁 Upload ERP Statement (CSV only)", type="filepath", file_types=[".csv"])
	external_file = gr.File(label="🏦 Upload Bank or Vendor Statement (CSV only)", type="filepath", file_types=[".csv"])

	btn = gr.Button("🔍 Reconcile")

	status = gr.Markdown()
	result = gr.HTML()
	download = gr.File(label="⬇️ Download Reconciliation Report", type="filepath")

	btn.click(
	fn=reconcile_statements_openai,
	inputs=[erp_file, external_file],
	outputs=[status, result, download]
	)
	iface.launch()