# # SPDX-FileCopyrightText: Hadad # SPDX-License-Identifier: Apache-2.0 # #OPENAI_API_BASE_URL # Endpoint. Not here -> Hugging Face Spaces secrets #OPENAI_API_KEY # API Key. Not here -> Hugging Face Spaces secrets MODEL = "gpt-4.1-nano" SEARXNG_ENDPOINT = "https://searx.stream/search" # See the endpoint list at https://searx.space BAIDU_ENDPOINT = "https://www.baidu.com/s" READER_ENDPOINT = "https://r.jina.ai/" REQUEST_TIMEOUT = 300 # 5 minute INSTRUCTIONS = """ You are ChatGPT, an AI assistant with mandatory real-time web search, URL content extraction, knowledge validation, and professional summarization capabilities. Your absolute rules: - You must always execute the provided tools (`web_search`, `read_url`) for every single user query or user request, without exception. - You are never allowed to answer directly from your internal knowledge, memory, or training data. Outdated or tool-bypassed answers are strictly forbidden. Core Principles: - Mandatory Tool Invocation: Every query or request, no matter how simple, factual, or complex, must trigger at least one `web_search` or `read_url`. - No Memory Reliance: Do not use prior conversation history, cached context, or built-in knowledge to generate answers. Always re-verify with tools. - Up-to-Date Grounding: All responses must be based only on real-time, verifiable data retrieved through tools. - Cross-Validation: Always compare findings across at least 3 independent, credible sources before producing a final answer. - Professional Output: Responses must be clear, structured, evidence-based, and neutral. Execution Workflow: 1. Initial Web Search - Immediately call `web_search` or `read_url` when a query or request arrives. - Use multiple query or request variations and search engines (`google`, `bing`, `baidu`) for broader coverage. 2. Result Selection - Choose up to 10 of the most relevant, credible, and content-rich results. - Prioritize authoritative sources (academia, institutions, official publications, expert commentary). - Avoid low-credibility or single-source reliance. 3. Content Retrieval - For each selected URL, use `read_url`. - Extract facts, data, statistics, and relevant arguments. - Normalize terminology and remove redundancies. 4. Cross-Validation - Compare extracted data from at least 5 sources. - Identify agreements, contradictions, and missing pieces. - Validate all numerical, temporal, and factual claims. 5. Knowledge Integration - Synthesize findings into a structured hierarchy: - Overview → Key details → Evidence → Citations. - Highlight the latest developments and their implications. 6. Response Construction - Always cite sources using [Source Title](Source_URL). - Maintain professional, precise, and neutral tone. - Use headings, numbered lists, and bullet points for clarity. - Ensure readability for both experts and general readers. 7. Ambiguity & Uncertainty Handling - Explicitly mark incomplete, ambiguous, or conflicting data. - Provide possible interpretations and reasoned explanations. 8. Quality & Consistency Assurance - Always base answers strictly on tool-derived evidence. - Ensure logical flow, factual accuracy, and neutrality. - Never bypass tool execution for any query or request. Critical Instruction: - Every new query or request must trigger a `web_search` or `read_url`. - You must not generate answers from prior knowledge, conversation history, or cached data. - Always use Markdown format for URL sources with [Source Title](Source_URL). - Ensure all Markdown links are properly formatted and clickable. - If tools fail, you must state explicitly that no valid data could be retrieved. Mandatory URL Citation Rules: - Extract the actual title from each webpage or document you retrieve. - Extract the complete URL exactly as provided by the tool response. - Format every single source reference as a clickable Markdown link using this exact pattern: [Actual Page Title](https://actual.url.here). - Never use placeholder text like "Source Title" or "Source_URL" in your citations. - Never write URLs as plain text. Always wrap them in Markdown link format. - For every fact, claim, or data point you mention, include the source link immediately after it. - Example of correct format: According to recent findings [Nature Research Article](https://www.nature.com/articles/example123). - Example of incorrect format: According to recent findings (Source: Nature). Source Detection and Formatting Protocol: - When `web_search` returns results, capture both the title and URL from each result. - When `read_url` is executed, use the actual page title and the exact URL provided. - Each paragraph containing information from a source must end with the citation in Markdown format. - If multiple sources support the same fact, list all sources using comma separation: [Source 1](URL1), [Source 2](URL2). - Never abbreviate or modify URLs. Copy them exactly as retrieved. - Never use generic titles. Extract the actual page title from the content or metadata. - Test each link format by ensuring it follows the pattern: square brackets containing visible text, immediately followed by parentheses containing the full URL. Verification Checklist for Every Response: - Have I included at least one clickable Markdown link for every factual claim. - Are all URLs complete and starting with http or https. - Do all links follow the exact format of [Descriptive Title](Full URL). - Have I avoided any placeholder text in my citations. - Can each link be clicked to access the original source. \n\n\n """ REASONING_STEPS = { "web_search": { "parsing": ( "I need to search for information about: {query}

" "I'm analyzing the user's request and preparing to execute a web search. " "The query I've identified is comprehensive and should yield relevant results. " "I will use the {engine} search engine for this task as it provides reliable and up-to-date information.

" "I'm now parsing the search parameters to ensure they are correctly formatted. " "The search query has been validated and I'm checking that all required fields are present. " "I need to make sure the search engine parameter is valid and supported by our system.

" "I'm preparing the search request with the following configuration:
" "- Search Query: {query}
" "- Search Engine: {engine}

" "I'm verifying that the network connection is stable and that the search service is accessible. " "All preliminary checks have been completed successfully." ), "executing": ( "I'm now executing the web search for: {query}

" "I'm connecting to the {engine} search service and sending the search request. " "The connection has been established successfully and I'm waiting for the search results. " "I'm processing multiple search result pages to gather comprehensive information.

" "I'm analyzing the search results to identify the most relevant and authoritative sources. " "The search engine is returning results and I'm filtering them based on relevance scores. " "I'm extracting key information from each search result including titles, snippets, and URLs.

" "I'm organizing the search results in order of relevance and checking for duplicate content. " "The search process is progressing smoothly and I'm collecting valuable information. " "I'm also verifying the credibility of the sources to ensure high-quality information.

" "Current status: Processing search results...
" "Results found: Multiple relevant sources identified
" "Quality assessment: High relevance detected" ), "completed": ( "I have successfully completed the web search for: {query}

" "I've retrieved comprehensive search results from {engine} and analyzed all the information. " "The search yielded multiple relevant results that directly address the user's query. " "I've extracted the most important information and organized it for processing.

" "I've identified several high-quality sources with authoritative information. " "The search results include recent and up-to-date content that is highly relevant. " "I've filtered out any duplicate or low-quality results to ensure accuracy.

" "I'm now processing the collected information to formulate a comprehensive response. " "The search results provide sufficient detail to answer the user's question thoroughly. " "I've verified the credibility of the sources and cross-referenced the information.

" "Search Summary:
" "- Total results processed: Multiple pages
" "- Relevance score: High
" "- Information quality: Verified and accurate
" "- Sources: Authoritative and recent

" "Preview of results:
{preview}" ), "error": ( "I encountered an issue while attempting to search for: {query}

" "I tried to execute the web search but encountered an unexpected error. " "The error occurred during the search process and I need to handle it appropriately. " "I'm analyzing the error to understand what went wrong and how to proceed.

" "Error details: {error}

" "I'm attempting to diagnose the issue and considering alternative approaches. " "The error might be due to network connectivity, service availability, or parameter issues. " "I will try to recover from this error and provide the best possible response.

" "I'm evaluating whether I can retry the search with modified parameters. " "If the search cannot be completed, I will use my existing knowledge to help the user. " "I'm committed to providing valuable assistance despite this technical challenge." ) }, "read_url": { "parsing": ( "I need to read and extract content from the URL: {url}

" "I'm analyzing the URL structure to ensure it's valid and accessible. " "The URL appears to be properly formatted and I'm preparing to fetch its content. " "I will extract the main content from this webpage to gather detailed information.

" "I'm validating the URL protocol and checking if it uses HTTP or HTTPS. " "The domain seems legitimate and I'm preparing the request headers. " "I need to ensure that the website allows automated content extraction.

" "I'm configuring the content extraction parameters:
" "- Target URL: {url}
" "- Extraction Method: Full content parsing
" "- Content Type: HTML/Text
" "- Encoding: Auto-detect

" "I'm checking if the website requires any special handling or authentication. " "All preliminary validation checks have been completed successfully." ), "executing": ( "I'm now accessing the URL: {url}

" "I'm establishing a connection to the web server and sending the HTTP request. " "The connection is being established and I'm waiting for the server response. " "I'm following any redirects if necessary to reach the final destination.

" "I'm downloading the webpage content and checking the response status code. " "The server is responding and I'm receiving the HTML content. " "I'm monitoring the download progress and ensuring data integrity.

" "I'm parsing the HTML structure to extract the main content. " "I'm identifying and removing navigation elements, advertisements, and other non-content sections. " "I'm focusing on extracting the primary article or information content.

" "Current status: Extracting content...
" "Response received: Processing HTML
" "Content extraction: In progress" ), "completed": ( "I have successfully extracted content from: {url}

" "I've retrieved the complete webpage content and processed it thoroughly. " "The extraction was successful and I've obtained the main textual content. " "I've cleaned the content by removing unnecessary HTML tags and formatting.

" "I've identified the main article or information section of the webpage. " "The content has been properly parsed and structured for analysis. " "I've preserved important information while filtering out irrelevant elements.

" "I'm now analyzing the extracted content to understand its context and relevance. " "The information appears to be comprehensive and directly related to the topic. " "I've verified that the content is complete and hasn't been truncated.

" "Extraction Summary:
" "- Content length: Substantial
" "- Extraction quality: High
" "- Content type: Article/Information
" "- Processing status: Complete

" "Preview of extracted content:
{preview}" ), "error": ( "I encountered an issue while trying to access: {url}

" "I attempted to fetch the webpage content but encountered an error. " "The error prevented me from successfully extracting the information. " "I'm analyzing the error to understand the cause and find a solution.

" "Error details: {error}

" "I'm considering possible causes such as network issues, access restrictions, or invalid URLs. " "The website might be blocking automated access or the URL might be incorrect. " "I will try to work around this limitation and provide alternative assistance.

" "I'm evaluating whether I can access the content through alternative methods. " "If direct access isn't possible, I'll use my knowledge to help with the query. " "I remain committed to providing useful information despite this obstacle." ) } } REASONING_DEFAULT = "I'm processing the tool execution request..." REASONING_DELAY = 0.15 # 150 ms OS = [ "Windows NT 10.0; Win64; x64", "Macintosh; Intel Mac OS X 10_15_7", "X11; Linux x86_64", "Windows NT 11.0; Win64; x64", "Macintosh; Intel Mac OS X 11_6_2" ] OCTETS = [ 1, 2, 3, 4, 5, 8, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 24, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223 ] BROWSERS = [ "Chrome", "Firefox", "Safari", "Edge", "Opera" ] CHROME_VERSIONS = [ "120.0.0.0", "119.0.0.0", "118.0.0.0", "117.0.0.0", "116.0.0.0" ] FIREFOX_VERSIONS = [ "121.0", "120.0", "119.0", "118.0", "117.0" ] SAFARI_VERSIONS = [ "17.1", "17.0", "16.6", "16.5", "16.4", ] EDGE_VERSIONS = [ "120.0.2210.91", "119.0.2151.97", "118.0.2088.76", "117.0.2045.60", "116.0.1938.81" ] DOMAINS = [ "google.com", "bing.com", "yahoo.com", "duckduckgo.com", "baidu.com", "yandex.com", "facebook.com", "twitter.com", "linkedin.com", "reddit.com", "youtube.com", "wikipedia.org", "amazon.com", "github.com", "stackoverflow.com", "medium.com", "quora.com", "pinterest.com", "instagram.com", "tumblr.com" ] PROTOCOLS = [ "https://", "https://www." ] SEARCH_ENGINES = [ "https://www.google.com/search?q=", "https://www.bing.com/search?q=", "https://search.yahoo.com/search?p=", "https://duckduckgo.com/?q=", "https://www.baidu.com/s?wd=", "https://yandex.com/search/?text=", "https://www.google.co.uk/search?q=", "https://www.google.ca/search?q=", "https://www.google.com.au/search?q=", "https://www.google.de/search?q=", "https://www.google.fr/search?q=", "https://www.google.co.jp/search?q=", "https://www.google.com.br/search?q=", "https://www.google.co.in/search?q=", "https://www.google.ru/search?q=", "https://www.google.it/search?q=" ] KEYWORDS = [ "news", "weather", "sports", "technology", "science", "health", "finance", "entertainment", "travel", "food", "education", "business", "politics", "culture", "history", "music", "movies", "games", "books", "art" ] COUNTRIES = [ "US", "GB", "CA", "AU", "DE", "FR", "JP", "BR", "IN", "RU", "IT", "ES", "MX", "NL", "SE", "NO", "DK", "FI", "PL", "TR", "KR", "SG", "HK", "TW", "TH", "ID", "MY", "PH", "VN", "AR", "CL", "CO", "PE", "VE", "EG", "ZA", "NG", "KE", "MA", "DZ", "TN", "IL", "AE", "SA", "QA", "KW", "BH", "OM", "JO", "LB" ] LANGUAGES = [ "en-US", "en-GB", "en-CA", "en-AU", "de-DE", "fr-FR", "ja-JP", "pt-BR", "hi-IN", "ru-RU", "it-IT", "es-ES", "es-MX", "nl-NL", "sv-SE", "no-NO", "da-DK", "fi-FI", "pl-PL", "tr-TR", "ko-KR", "zh-CN", "zh-TW", "th-TH", "id-ID", "ms-MY", "fil-PH", "vi-VN", "es-AR", "es-CL", "es-CO", "es-PE", "es-VE", "ar-EG", "en-ZA", "en-NG", "sw-KE", "ar-MA", "ar-DZ", "ar-TN", "he-IL", "ar-AE", "ar-SA", "ar-QA", "ar-KW", "ar-BH", "ar-OM", "ar-JO", "ar-LB" ] TIMEZONES = [ "America/New_York", "America/Chicago", "America/Los_Angeles", "America/Denver", "Europe/London", "Europe/Paris", "Europe/Berlin", "Europe/Moscow", "Asia/Tokyo", "Asia/Shanghai", "Asia/Hong_Kong", "Asia/Singapore", "Asia/Seoul", "Asia/Mumbai", "Asia/Dubai", "Australia/Sydney", "Australia/Melbourne", "America/Toronto", "America/Vancouver", "America/Mexico_City", "America/Sao_Paulo", "America/Buenos_Aires", "Africa/Cairo", "Africa/Johannesburg", "Africa/Lagos", "Africa/Nairobi", "Pacific/Auckland", "Pacific/Honolulu" ] DESCRIPTION = """ SearchGPT is ChatGPT with real-time web search capabilities and the ability to read content directly from a URL.

This Space implements an agent-based system with Gradio. It is integrated with SearXNG, which is then converted into a script tool or function for native execution.

The agent mode is inspired by the Deep Research from OpenWebUI tools script.

The Deep Research feature is also available on the primary Spaces of UltimaX Intelligence.

Please consider reading the Terms of Use and Consequences of Violation if you wish to proceed to the main Spaces.

Like this project? Feel free to buy me a coffee. """ # Gradio