fixes Python: DevUI fails when uploading Pdf file (tested on Python Foundry Agent) (#2675)

Fixes #2652
2026-06-16 21:04:09 +08:00 · 2025-12-08 09:00:57 -08:00
parent 551219cb55
commit 0675000f4b
10 changed files with 417 additions and 132 deletions
@@ -0,0 +1,15 @@
+# Azure OpenAI Responses API Configuration
+# The Responses API supports PDF uploads, images, and other multimodal content.
+# Requires api-version 2025-03-01-preview or later.
+
+# Option 1: Use API key authentication
+AZURE_OPENAI_API_KEY=your-azure-openai-api-key-here
+
+# Option 2: Use Azure CLI authentication (run 'az login' first)
+# No API key needed - just leave AZURE_OPENAI_API_KEY unset
+
+# Required: Azure OpenAI endpoint with Responses API support
+AZURE_OPENAI_ENDPOINT=https://your-resource.cognitiveservices.azure.com/
+
+# Required: Deployment name (must support Responses API)
+AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME=gpt-4.1-mini
@@ -0,0 +1,6 @@
+# Copyright (c) Microsoft. All rights reserved.
+"""Azure Responses Agent sample for DevUI."""
+
+from .agent import agent
+
+__all__ = ["agent"]
@@ -0,0 +1,123 @@
+# Copyright (c) Microsoft. All rights reserved.
+"""Sample agent using Azure OpenAI Responses API for Agent Framework DevUI.
+
+This agent uses the Responses API which supports:
+- PDF file uploads
+- Image uploads
+- Audio inputs
+- And other multimodal content
+
+The Chat Completions API (AzureOpenAIChatClient) does NOT support PDF uploads.
+Use this agent when you need to process documents or other file types.
+
+Required environment variables:
+- AZURE_OPENAI_ENDPOINT: Your Azure OpenAI endpoint
+- AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME: Deployment name for Responses API
+  (falls back to AZURE_OPENAI_CHAT_DEPLOYMENT_NAME if not set)
+- AZURE_OPENAI_API_KEY: Your API key (or use Azure CLI auth)
+"""
+
+import logging
+import os
+from typing import Annotated
+
+from agent_framework import ChatAgent, ai_function
+from agent_framework.azure import AzureOpenAIResponsesClient
+
+logger = logging.getLogger(__name__)
+
+# Get deployment name - try responses-specific env var first, fall back to chat deployment
+_deployment_name = os.environ.get(
+    "AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME",
+    os.environ.get("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", ""),
+)
+
+# Get endpoint - try responses-specific env var first, fall back to default
+_endpoint = os.environ.get(
+    "AZURE_OPENAI_RESPONSES_ENDPOINT",
+    os.environ.get("AZURE_OPENAI_ENDPOINT", ""),
+)
+
+
+def analyze_content(
+    query: Annotated[str, "What to analyze or extract from the uploaded content"],
+) -> str:
+    """Analyze uploaded content based on the user's query.
+
+    This is a placeholder - the actual analysis is done by the model
+    when processing the uploaded files.
+    """
+    return f"Analyzing content for: {query}"
+
+
+@ai_function
+def summarize_document(
+    length: Annotated[str, "Desired summary length: 'brief', 'medium', or 'detailed'"] = "medium",
+) -> str:
+    """Generate a summary of the uploaded document."""
+    return f"Generating {length} summary of the document..."
+
+
+@ai_function
+def extract_key_points(
+    max_points: Annotated[int, "Maximum number of key points to extract"] = 5,
+) -> str:
+    """Extract key points from the uploaded document."""
+    return f"Extracting up to {max_points} key points..."
+
+
+# Agent using Azure OpenAI Responses API (supports PDF uploads!)
+agent = ChatAgent(
+    name="AzureResponsesAgent",
+    description="An agent that can analyze PDFs, images, and other documents using Azure OpenAI Responses API",
+    instructions="""
+    You are a helpful document analysis assistant. You can:
+
+    1. Analyze uploaded PDF documents and extract information
+    2. Summarize document contents
+    3. Answer questions about uploaded files
+    4. Extract key points and insights
+
+    When a user uploads a file, carefully analyze its contents and provide
+    helpful, accurate information based on what you find.
+
+    For PDFs, you can read and understand the text, tables, and structure.
+    For images, you can describe what you see and extract any text.
+    """,
+    chat_client=AzureOpenAIResponsesClient(
+        deployment_name=_deployment_name,
+        endpoint=_endpoint,
+        api_version="2025-03-01-preview",  # Required for Responses API
+    ),
+    tools=[summarize_document, extract_key_points],
+)
+
+
+def main():
+    """Launch the Azure Responses agent in DevUI."""
+    from agent_framework_devui import serve
+
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
+
+    logger.info("=" * 60)
+    logger.info("Starting Azure Responses Agent")
+    logger.info("=" * 60)
+    logger.info("")
+    logger.info("This agent uses the Azure OpenAI Responses API which supports:")
+    logger.info("  - PDF file uploads")
+    logger.info("  - Image uploads")
+    logger.info("  - Audio inputs")
+    logger.info("")
+    logger.info("Try uploading a PDF and asking questions about it!")
+    logger.info("")
+    logger.info("Required environment variables:")
+    logger.info("  - AZURE_OPENAI_ENDPOINT")
+    logger.info("  - AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME")
+    logger.info("  - AZURE_OPENAI_API_KEY (or use Azure CLI auth)")
+    logger.info("")
+
+    serve(entities=[agent], port=8090, auto_open=True)
+
+
+if __name__ == "__main__":
+    main()