fixes Python: DevUI fails when uploading Pdf file (tested on Python Foundry Agent) (#2675)

Fixes #2652
This commit is contained in:
Victor Dibia
2025-12-08 09:00:57 -08:00
committed by GitHub
Unverified
parent 551219cb55
commit 0675000f4b
10 changed files with 417 additions and 132 deletions
@@ -0,0 +1,15 @@
# Azure OpenAI Responses API Configuration
# The Responses API supports PDF uploads, images, and other multimodal content.
# Requires api-version 2025-03-01-preview or later.
# Option 1: Use API key authentication
AZURE_OPENAI_API_KEY=your-azure-openai-api-key-here
# Option 2: Use Azure CLI authentication (run 'az login' first)
# No API key needed - just leave AZURE_OPENAI_API_KEY unset
# Required: Azure OpenAI endpoint with Responses API support
AZURE_OPENAI_ENDPOINT=https://your-resource.cognitiveservices.azure.com/
# Required: Deployment name (must support Responses API)
AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME=gpt-4.1-mini
@@ -0,0 +1,6 @@
# Copyright (c) Microsoft. All rights reserved.
"""Azure Responses Agent sample for DevUI."""
from .agent import agent
__all__ = ["agent"]
@@ -0,0 +1,123 @@
# Copyright (c) Microsoft. All rights reserved.
"""Sample agent using Azure OpenAI Responses API for Agent Framework DevUI.
This agent uses the Responses API which supports:
- PDF file uploads
- Image uploads
- Audio inputs
- And other multimodal content
The Chat Completions API (AzureOpenAIChatClient) does NOT support PDF uploads.
Use this agent when you need to process documents or other file types.
Required environment variables:
- AZURE_OPENAI_ENDPOINT: Your Azure OpenAI endpoint
- AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME: Deployment name for Responses API
(falls back to AZURE_OPENAI_CHAT_DEPLOYMENT_NAME if not set)
- AZURE_OPENAI_API_KEY: Your API key (or use Azure CLI auth)
"""
import logging
import os
from typing import Annotated
from agent_framework import ChatAgent, ai_function
from agent_framework.azure import AzureOpenAIResponsesClient
logger = logging.getLogger(__name__)
# Get deployment name - try responses-specific env var first, fall back to chat deployment
_deployment_name = os.environ.get(
"AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME",
os.environ.get("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", ""),
)
# Get endpoint - try responses-specific env var first, fall back to default
_endpoint = os.environ.get(
"AZURE_OPENAI_RESPONSES_ENDPOINT",
os.environ.get("AZURE_OPENAI_ENDPOINT", ""),
)
def analyze_content(
query: Annotated[str, "What to analyze or extract from the uploaded content"],
) -> str:
"""Analyze uploaded content based on the user's query.
This is a placeholder - the actual analysis is done by the model
when processing the uploaded files.
"""
return f"Analyzing content for: {query}"
@ai_function
def summarize_document(
length: Annotated[str, "Desired summary length: 'brief', 'medium', or 'detailed'"] = "medium",
) -> str:
"""Generate a summary of the uploaded document."""
return f"Generating {length} summary of the document..."
@ai_function
def extract_key_points(
max_points: Annotated[int, "Maximum number of key points to extract"] = 5,
) -> str:
"""Extract key points from the uploaded document."""
return f"Extracting up to {max_points} key points..."
# Agent using Azure OpenAI Responses API (supports PDF uploads!)
agent = ChatAgent(
name="AzureResponsesAgent",
description="An agent that can analyze PDFs, images, and other documents using Azure OpenAI Responses API",
instructions="""
You are a helpful document analysis assistant. You can:
1. Analyze uploaded PDF documents and extract information
2. Summarize document contents
3. Answer questions about uploaded files
4. Extract key points and insights
When a user uploads a file, carefully analyze its contents and provide
helpful, accurate information based on what you find.
For PDFs, you can read and understand the text, tables, and structure.
For images, you can describe what you see and extract any text.
""",
chat_client=AzureOpenAIResponsesClient(
deployment_name=_deployment_name,
endpoint=_endpoint,
api_version="2025-03-01-preview", # Required for Responses API
),
tools=[summarize_document, extract_key_points],
)
def main():
"""Launch the Azure Responses agent in DevUI."""
from agent_framework_devui import serve
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger.info("=" * 60)
logger.info("Starting Azure Responses Agent")
logger.info("=" * 60)
logger.info("")
logger.info("This agent uses the Azure OpenAI Responses API which supports:")
logger.info(" - PDF file uploads")
logger.info(" - Image uploads")
logger.info(" - Audio inputs")
logger.info("")
logger.info("Try uploading a PDF and asking questions about it!")
logger.info("")
logger.info("Required environment variables:")
logger.info(" - AZURE_OPENAI_ENDPOINT")
logger.info(" - AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME")
logger.info(" - AZURE_OPENAI_API_KEY (or use Azure CLI auth)")
logger.info("")
serve(entities=[agent], port=8090, auto_open=True)
if __name__ == "__main__":
main()