AgentX / app.py
dlflannery's picture
Update app.py
3f46732 verified
raw
history blame
21.5 kB
from operator import indexOf
import os
import gradio as gr
import asyncio
from dotenv import load_dotenv
from pathlib import Path
from time import sleep
from glob import glob
import copy
import base64
import json
from PIL import Image
from io import BytesIO
import math
from openai.types import Reasoning
import requests
from urllib.parse import quote
from multiprocessing import context
# from agents.tool import WebSearchTool
from typing_extensions import TypedDict, Any
from agents import Agent, FunctionTool, ImageGenerationTool, RunContextWrapper, Runner, function_tool, CodeInterpreterTool, ModelSettings
from openai import OpenAI
from markdown_pdf import MarkdownPdf
from markdown_pdf import Section
from docx import Document
import brave
import geo_distance
load_dotenv(override=True)
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
users = os.getenv('LOGNAME')
unames = users.split(',')
pwds = os.getenv('PASSWORD')
pwd_list = pwds.split(',')
# DEEPSEEK_KEY=os.getenv('DEEPSEEK_KEY')
# GROQ_KEY=os.getenv('GROQ_KEY')
BRAVE_KEY=os.getenv('BRAVE_KEY')
# BRAVE_SEARCH_KEY=os.getenv('BRAVE_SEARCH_KEY')
LOCATIONID_KEY=os.getenv('LOCATIONID_KEY')
# site = os.getenv('SITE')
# if site == 'local':
# dp = Path('./data')
# dp.mkdir(exist_ok=True)
# dataDir = './data/'
# else:
# dp = Path('/data')
# dp.mkdir(exist_ok=True)
# dataDir = '/data/'
# mimes = {'pdf': 'application/pdf', 'txt': 'text/plain',
# 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'}
@function_tool
async def make_pdf(text: str, title: str)->str:
'''Creates a pdf document based on input markdown text and title string.
Args:
text: The markdown text to be processed to create the pdf document.
title: A title that will be used as part of a filename in a file path.
Returns:
The file path to the PDF that was created and saved.
'''
for file in glob('./document.pdf'):
os.remove(file)
pdf = MarkdownPdf()
pdf.add_section(Section(text))
outpath = os.path.join('./','document.pdf')
pdf.save(outpath)
return outpath
@function_tool
async def get_news(query: str, window: str)->str:
'''Searches the internet news sources for news related to the query, within
a time window defined by argument window.
Args:
query: The topic about which news is desired.
window: a string indicating the time window over which news occurs, must be either 'day', 'week', 'month' or 'year'.
'''
periods = {"day":"pd","week":"pw","month":"pm","year":"py"}
window = window.casefold()
period = 'pw'
if window in periods.keys():
period = periods[window]
news = brave.get_brave_news(query, BRAVE_KEY, period)
return news
@function_tool
async def search_web(query: str)->str:
'''Searches the internet for information related to the query.
Args:
query: The topics to be searched on.
'''
result = brave.get_brave_search_results(query, BRAVE_KEY)
return result
@function_tool
async def get_distance(addr1: str, addr2: str)->float:
'''Compute the great-circle distance in miles between two addresses or other location specifiers.
Args:
addr1: The first address or location specifier.
addr2: The second address or location specifier.
Returns:
Distance in miles
'''
(lat1, lon1) = geo_distance.get_geo_coords(addr1, LOCATIONID_KEY)
(lat2, lon2) = geo_distance.get_geo_coords(addr2, LOCATIONID_KEY)
distance = geo_distance.great_circle_distance_miles(lat1, lon1, lat2, lon2)
return distance
def extract_text_from_docx(file_path):
doc = Document(file_path)
text = []
for paragraph in doc.paragraphs:
text.append(paragraph.text)
return "\n".join(text)
def md(txt):
return str(txt).replace('```', ' ').replace(' ', '&nbsp;&nbsp;').replace(' ', '&nbsp;&nbsp;').replace(' ', '&nbsp;&nbsp;').replace('\n','<br>').replace('~~','~')
# def clean_history(db_path: str):
# connection = sqlite3.connect(db_path)
# cursor = connection.cursor()
# # Query to fetch all table names
# cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
# tables = cursor.fetchall()
# # Drop each table
# for table_name in tables:
# if table_name != 'sqlite_sequence':
# cursor.execute(f"DELETE FROM {table_name[0]};")
# # Commit changes and close the connection
# connection.commit()
# connection.close()
def updatePassword(pwd, user):
password = 'none'
if user in unames:
pwd = pwd.lower().strip()
if pwd == pwd_list[unames.index(user)]:
password = pwd
return [password, "*********",
gr.Button(value='Upload Input File', interactive=True),
gr.Button(value='Upload Image to Analyze', interactive=True)]
else:
return [password, "invalid password",
gr.Button(value='Upload Input File', interactive=False),
gr.Button(value='Upload Image to Analyze', interactive=False)]
else:
return [password, "invalid user",
gr.Button(value='Upload Input File', interactive=False),
gr.Button(value='Upload Image to Analyze', interactive=False)]
def update_user(user_win):
user_win = user_win.lower().strip()
user = 'unknown'
for s in unames:
if user_win == s:
user = s
break
return [user, user]
def credentials_ok(user, pwd):
return user in unames and pwd in pwd_list
def clean_up_files():
for file in glob('./document.*'):
try:
os.remove(file)
except:
pass
for file in glob('./*.png'):
try:
os.remove(file)
except:
pass
def load_image(image, user, output_window, uploaded_image_files):
# status = #'OK, image is ready! Enter prompt and tap submit button'
try:
with open(image, 'rb') as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
fpath = f'{user}_image{len(uploaded_image_files)}.b64'
with open(fpath, 'wt') as fp:
fp.write(base64_image)
output_window += md(f'\nImage {os.path.basename(image)} loaded\n')
uploaded_image_files.append(fpath)
except:
output_window = 'Unable to upload image'
return [uploaded_image_files, output_window]
def upload_image(user, password, output_window):
if not credentials_ok(user, password):
return [gr.Image(visible=False, interactive=True), "Incorrect user name and/or password"]
return [gr.Image(visible=True, interactive=True, value=None), output_window]
def upload_file(user, password, output_window):
if not credentials_ok(user, password):
return [gr.File(visible=False, label='Upload File'), 'Incorrect user and/or password']
return [gr.File(visible=True, label='UploadFile', value=None), output_window]
def load_file(file_uploader, output_window, uploaded_file_paths):
path = file_uploader
fname = os.path.basename(path)
uploaded_file_paths.append(path)
return [uploaded_file_paths, output_window + f'<br>{fname} loaded<br>',
gr.File(visible=False, label='Upload File', type='filepath', value=None) ]
def create_openai_container(name):
url = 'https://api.openai.com/v1/containers'
headers= {"Authorization": "Bearer " + OPENAI_API_KEY, "Content-Type": "application/json",}
json_data = {"name": name}
response = requests.post(
url,
headers=headers,
json=json_data
)
return json.loads(response.content)["id"]
def get_openai_file(file_id, container_id):
url = f'https://api.openai.com/v1/containers/{container_id}/files/{file_id}/content'
headers= {"Authorization": "Bearer " + OPENAI_API_KEY}
response = requests.get(
url,
headers=headers
)
return response
def list_openai_container_files(container_id):
url = f'https://api.openai.com/v1/containers/{container_id}/files'
headers= {"Authorization": "Bearer " + OPENAI_API_KEY}
response = requests.get(
url,
headers=headers
)
return response
async def chat(prompt_window, user_window, password, history, output_window,
uploaded_image_files, uploaded_file_paths, prior_inputs):
file_download = gr.DownloadButton(label='Download File', visible=False, value=None)
image_window = gr.Image(visible=False, value=None)
if not credentials_ok(user_window, password):
return ['Invalid Credentials', prompt_window, uploaded_image_files,
image_window, file_download, history, uploaded_file_paths, prior_inputs]
instructions = '''
You are a helpful assistant.
You can call tools to compute straight-line distances and to search the web for
either information or news. When you search for news you need to specify a period
of either 'day', 'week', 'month' or 'year'. You also have a tool to create PDF
documents and it requires markdown text as input. If a distance is requested use
straight-line distance by default, and when possible use street addresses for locations.
'''
code_container = create_openai_container('my_container')
agent = Agent(name="Assistant",
instructions=instructions,
model_settings=ModelSettings(reasoning=Reasoning(effort='low', summary='detailed')),
tools=[get_distance, search_web, get_news,
CodeInterpreterTool(tool_config={"type": "code_interpreter","container": code_container}), # make_pdf,
ImageGenerationTool(tool_config={"type": "image_generation", "quality": "low"},)],)
response = output_window
if not response:
response = ''
prompt = prompt_window
# inputs = history.copy()
inputs = prior_inputs
file_input = ''
if len(uploaded_file_paths) > 0:
for uploaded_file_path in uploaded_file_paths:
ext = uploaded_file_path.casefold().split('.')[-1]
if ext == 'pdf':
client = OpenAI(api_key = OPENAI_API_KEY)
file = client.files.create(file=open(f'{uploaded_file_path}','rb'),
purpose='user_data',
expires_after={"seconds": 3600, "anchor": "created_at"})
file_input=(
{"role": "user",
"content": [
{
"type": "input_file",
"file_id": file.id,
}
]
}
)
inputs.append(file_input)
if ext in ['docx', 'txt', 'py']:
if ext == 'docx':
extracted_text = extract_text_from_docx(uploaded_file_path)
else:
with open(uploaded_file_path, 'rt') as fp:
extracted_text = fp.read()
file_input=(
{"role": "user",
"content": [
{
"type": "input_text",
"text": f"{extracted_text}",
}
]
}
)
inputs.append(file_input)
uploaded_file_paths = []
image_input = ''
if len(uploaded_image_files) > 0:
for file in uploaded_image_files:
with open(file, 'rt') as fp:
b64data = fp.read()
os.remove(file)
image_input = (
{
"role": "user",
"content": [
{
"type": "input_image",
"image_url": f'data:image/jpeg;base64, {b64data}',
}
]
}
)
inputs.append(image_input)
uploaded_image_files = []
history.append({"role":"user", "content":prompt})
inputs.append({"role":"user", "content":prompt})
exception_msg = ''
result = None
reasoning = ''
try:
result = await Runner.run(agent, max_turns=20,
input=inputs)
for item in result.new_items:
if (
item.type == "tool_call_item"
and item.raw_item.type == "image_generation_call"
and (img_result := item.raw_item.result)
):
image_out_path = f'{user_window}_out.png'
with open(image_out_path,'wb') as fp:
fp.write(base64.b64decode(img_result))
image_window = gr.Image(visible=True, value=image_out_path)
if item.type == "reasoning_item":
for o in item.raw_item.summary:
if o.type == "summary_text":
reasoning += ('\n' + o.text + '\n')
# for raw in result.raw_responses:
# for item in raw.output:
# if item.type == 'reasoning' and item.summary:
# for o in item.summary:
# reasoning += ('\nm1: ' + o.text + '\n')
reply = md(result.final_output)
response += "\n\n***YOU***: " + prompt + "\n\n***GPT***: " + reply.replace('```','\n\n```\n\n')
if reasoning != '':
response += '\n\n**Reasoning:**\n\n' + reasoning + '\n'
history.append({"role":"assistant", "content":result.final_output})
except Exception as e:
exception_msg = f'Error: {e.message}'
response += "\n\n***YOU***: " + prompt + "\n\n***GPT***: " + exception_msg
if result:
new_inputs = result.to_input_list()
usage = result.context_wrapper.usage
response += f"\nTotal tokens: = {usage.total_tokens}"
loc = result.final_output.find('/mnt/data/')
if loc > -1:
ts = result.final_output[loc+10:]
loc2 = ts.find(')')
if loc2 > -1:
fname = ts[:loc2]
container_list = list_openai_container_files(code_container)
file_list_json = json.loads(container_list.content)
latest_file_time = 0
download_file_id = None
download_ext = ''
for item in file_list_json['data']:
if fname in item["path"]:
file_time = item["created_at"]
if file_time > latest_file_time:
latest_file_time = file_time
download_file_id = item["id"]
download_ext = fname.split('.')[-1].casefold()
if download_file_id:
fdata = get_openai_file(download_file_id, code_container).content
with open(f'./document.{download_ext}', 'wb') as fp:
fp.write(fdata)
file_download = gr.DownloadButton(label=f'Download {download_ext.upper()} Doc',
visible=True, value=f'./document.{download_ext}')
return [response, '', uploaded_image_files, image_window, file_download, history,
uploaded_file_paths, new_inputs]
def show_help():
txt = '''
This is an agent using the OpenAI Python Agents SDK.
It has tools to:
* Search the Web
* Compute straight-line distances between locations
* Analyze images you upload.
* Create and display images you describe, which you can download.
* Use uploaded images and documents as context. (.txt., .pdf, .docx, .py)
* Get news from the web.
* Make PDF's, Word Documents and Excel spreadsheets based on results it generated.
Agents perform multiple steps using tools as necessary to satisfy a single request.
1. Gemeral:
1.1 Login with user name and password (not case-sensitive)
1.2 Type prompts (questions, instructions) into "Prompt or Question" window.
2. Chat:
2.1 Upload any image(s) and/or documents (files) you want the agent to consider, using
the "Upload Image to Analyze" and "Upload Input File" buttons.
2.2 Enter prompt/question and tap the "Submit Prompt/Question" button. The responses appear
in the Dialog window.
2.3 Continue your session by optionally uploading more files and/or images and entering a
new prompt/question. The agent remembers past inputs and responses until you tap
the "Start New Session" button.
2.4 If topic changes or when done chatting, tap the "Start New Session" button.
3. Make Image:
3.1 Include description of desired image in prompt window. If desired, uploaded images and
files can also be used.
3.2 Tap the "Submit Prompt/Question" button. This can take a few seconds.
3.3 There is a download button on the image display if your system supports file downloads.
3.4 When done viewing image, tap the "Start New Session" button
Hints:
Better results are obtained by including detailed descriptions and instructions
of what you want in the prompt.
Start a new session whenever memory of previous inputs and responses is no longer
needed as context. The agent can only remember so much.
'''
return str(txt).replace('```', ' ').replace(' ', '&nbsp;&nbsp;').replace(' ', '&nbsp;&nbsp;').replace(' ', '&nbsp;&nbsp;').replace('\n','<br>')
def new_session(user_window, history):
history = []
return [prompt_window, history, 'Session cleared',
gr.Image(visible=False, value=None),
gr.Image(visible=False, value=None), [],
gr.DownloadButton(label='Download File', visible=False, value=None),
gr.File(visible=False, label='Upload File', type='filepath'), [] ]
with gr.Blocks(theme=gr.themes.Soft()) as demo:
password = gr.State("")
user = gr.State("unknown")
uploaded_image_files = gr.State([])
uploaded_file_paths = gr.State([])
history = gr.State([])
inputs = gr.State([])
gr.Markdown('# GPT Agent')
gr.Markdown('Enter user name & password. Tap "Help & Hints" button for more instructions.')
with gr.Row():
user_window = gr.Textbox(label = "User Name")
user_window.blur(fn=update_user, inputs=user_window, outputs=[user, user_window])
pwd_window = gr.Textbox(label = "Password")
help_button = gr.Button(value='Help & Hints')
with gr.Row():
clear_button = gr.Button(value="Start New Session")
button_upload_file = gr.Button(value='Upload Input File', interactive=False)
button_get_image = gr.Button(value='Upload Image to Analyze', interactive=False)
submit_button = gr.Button(value="Submit Prompt/Question")
with gr.Row():
prompt_window = gr.Textbox(label = "Prompt or Question", scale=7)
gr.Markdown('### **Dialog:**')
#output_window = gr.Text(container=True, label='Dialog')
output_window = gr.Markdown(container=True)
file_download = gr.DownloadButton(label='Download File', visible=False, value=None)
with gr.Row():
with gr.Column():
image_window2 = gr.Image(visible=False, interactive=True, label='Image to Analyze',
type='filepath')
with gr.Column():
image_window = gr.Image(visible=False, label='Generated Image')
with gr.Row():
file_uploader = gr.File(visible=False, label='Upload File', type='filepath')
submit_button.click(chat,
inputs=[prompt_window, user_window, password, history, output_window,
uploaded_image_files, uploaded_file_paths, inputs],
outputs=[output_window, prompt_window, uploaded_image_files,
image_window, file_download, history, uploaded_file_paths, inputs])
clear_button.click(fn=new_session, inputs=[user_window, history],
outputs=[prompt_window, history, output_window,
image_window, image_window2,
uploaded_image_files, file_download, file_uploader, inputs])
help_button.click(fn=show_help, outputs=output_window)
button_get_image.click(fn=upload_image,inputs = [user, password, output_window],
outputs = [image_window2, output_window])
image_window2.upload(fn=load_image,
inputs=[image_window2, user, output_window, uploaded_image_files],
outputs=[uploaded_image_files, output_window])
pwd_window.blur(updatePassword,
inputs = [pwd_window, user],
outputs = [password, pwd_window, button_upload_file, button_get_image])
button_upload_file.click(fn=upload_file, inputs=[user, password, output_window],
outputs=[file_uploader, output_window])
file_uploader.upload(fn=load_file, inputs=[file_uploader, output_window, uploaded_file_paths],
outputs=[uploaded_file_paths, output_window, file_uploader])
# demo.launch(share=True, allowed_paths=[dataDir], ssr_mode=False)
# demo.load(delete_db_files)
demo.unload(clean_up_files)
demo.launch(share=True, ssr_mode=False)