agent-framework/python/packages/azure-contentunderstanding/samples/01-get-started/01_document_qa.py

# Copyright (c) Microsoft. All rights reserved.
# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "agent-framework-azure-contentunderstanding",
#     "agent-framework-foundry",
#     "azure-identity",
# ]
# ///
# Run with: uv run packages/azure-contentunderstanding/samples/01-get-started/01_document_qa.py


import asyncio
import os
from pathlib import Path

from agent_framework import Agent, Content, Message
from agent_framework.foundry import ContentUnderstandingContextProvider, FoundryChatClient
from azure.identity import AzureCliCredential
from dotenv import load_dotenv

load_dotenv()

"""
Document Q&A — PDF upload with CU-powered extraction

This sample demonstrates the simplest CU integration: upload a PDF and
ask questions about it. Azure Content Understanding extracts structured
markdown with table preservation — superior to LLM-only vision for
scanned PDFs, handwritten content, and complex layouts.

Environment variables:
  FOUNDRY_PROJECT_ENDPOINT                — Azure AI Foundry project endpoint
  FOUNDRY_MODEL             — Model deployment name (e.g. gpt-4.1)
  AZURE_CONTENTUNDERSTANDING_ENDPOINT      — CU endpoint URL
"""

# Path to a sample PDF — uses the shared sample asset if available,
# otherwise falls back to a public URL
SAMPLE_PDF_PATH = Path(__file__).resolve().parents[1] / "shared" / "sample_assets" / "invoice.pdf"


async def main() -> None:
    credential = AzureCliCredential()

    # Set up Azure Content Understanding context provider
    cu = ContentUnderstandingContextProvider(
        endpoint=os.environ["AZURE_CONTENTUNDERSTANDING_ENDPOINT"],
        credential=credential,
        analyzer_id="prebuilt-documentSearch",  # RAG-optimized document analyzer
        max_wait=None,  # wait until CU analysis finishes (no background deferral)
    )

    # Set up the LLM client
    client = FoundryChatClient(
        project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"],
        model=os.environ["FOUNDRY_MODEL"],
        credential=credential,
    )

    # Create agent with CU context provider.
    # The provider extracts document content via CU and injects it into the
    # LLM context so the agent can answer questions about the document.
    async with cu:
        agent = Agent(
            client=client,
            name="DocumentQA",
            instructions=(
                "You are a helpful document analyst. Use the analyzed document "
                "content and extracted fields to answer questions precisely."
            ),
            context_providers=[cu],
        )

        # --- Turn 1: Upload PDF and ask a question ---
        # 4. Upload PDF and ask questions
        # The CU provider extracts markdown + fields from the PDF and injects
        # the full content into context so the agent can answer precisely.
        print("--- Upload PDF and ask questions ---")

        pdf_bytes = SAMPLE_PDF_PATH.read_bytes()

        response = await agent.run(
            Message(
                role="user",
                contents=[
                    Content.from_text(
                        "What is this document about? Who is the vendor, and what is the total amount due?"
                    ),
                    Content.from_data(
                        pdf_bytes,
                        "application/pdf",
                        # Always provide filename — used as the document key
                        additional_properties={"filename": SAMPLE_PDF_PATH.name},
                    ),
                ],
            )
        )
        usage = response.usage_details or {}
        print(f"Agent: {response}")
        print(f"  [Input tokens: {usage.get('input_token_count', 'N/A')}]\n")


if __name__ == "__main__":
    asyncio.run(main())

"""
Sample output:

--- Upload PDF and ask questions ---
Agent: This document is an **invoice** for services and fees billed to
  **MICROSOFT CORPORATION** (Invoice **INV-100**), including line items
  (e.g., Consulting Services, Document Fee, Printing Fee) and a billing summary.
  - **Vendor:** **CONTOSO LTD.**
  - **Total amount due:** **$610.00**
  [Input tokens: 988]
"""