mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
f5419b9f38
* Python: bump package versions for 1.2.2 release PATCH bump (1.2.1 -> 1.2.2) for the released cohort. Five PRs land in this window: - agent-framework-openai: fix file_search citations breaking the assistant- message history roundtrip (#5557) — drives the released-tier PATCH - agent-framework-orchestrations: [BREAKING] standardize orchestration terminal outputs as AgentResponse (#5301) - agent-framework-core, agent-framework-declarative: preserve Workflow.run() shared state across calls, accept list[Message] in declarative start executor, and coerce Enum values when serializing PowerFx symbols (#5531) - agent-framework-foundry-hosting: add hosted Durable Workflow support (#5531) - agent-framework-azure-contentunderstanding: new alpha package — Azure AI Content Understanding context provider (#4829) - dependencies: workspace package dependency refresh (#5555) Per lockstep convention, all 21 beta packages stamp 1.0.0b260429 and all 4 alpha packages (now including the new contentunderstanding) stamp 1.0.0a260429. Date stamp reflects 2026-04-29 Pacific. Every non-core package floor on agent-framework-core is raised to >=1.2.2; the new contentunderstanding package's stale >=1.0.0 floor is brought into line. Two follow-on fixes bundled to keep validate-dependency-bounds-test green at lowest-direct resolution: - Bump agent-framework-azure-contentunderstanding's azure-ai-content understanding lower bound from >=1.0.0 to >=1.0.1 (1.0.0 ships without proper typing — pyright reports 65 unknown-type errors) - Add pyright ignore comments to core/foundry/__init__.pyi for the new alpha package's type-stub imports, since alpha packages are not in core's [all] extra and therefore aren't installed at lowest-direct * Python: add #5552 to 1.2.2 CHANGELOG Add the streaming-span observability fix to the Fixed section. PR is on upstream/main but not yet pulled into origin/main; the code itself will land via the PR merge. * Python: address PR #5561 review feedback on dependency bounds Two packaging fixes flagged in review: 1. agent-framework-azure-contentunderstanding: add agent-framework-foundry as a runtime dependency. The package's README directs users to `pip install agent-framework-azure-contentunderstanding --pre` and the basic example imports `FoundryChatClient` from `agent_framework.foundry`, so the documented install path was failing with ImportError. Pulling agent-framework-foundry into deps makes the advertised entry path self-contained. 2. agent-framework-foundry: bump agent-framework-openai lower bound from >=1.1.0 to >=1.2.2,<2. Foundry imports private modules from agent_framework_openai (`_chat_client.py:22`, `_agent.py:34`), so resolvers were free to pair foundry==1.2.2 with older OpenAI versions that lack this release's coordinated Responses/history fix. Lockstep the floor with the released cohort to prevent mismatched installs. Both changes pass `validate-dependency-bounds-test` lower + upper at their respective packages.
194 lines
7.0 KiB
Python
194 lines
7.0 KiB
Python
# Copyright (c) Microsoft. All rights reserved.
|
|
# /// script
|
|
# requires-python = ">=3.10"
|
|
# dependencies = [
|
|
# "agent-framework-azure-contentunderstanding",
|
|
# "agent-framework-foundry",
|
|
# "azure-identity",
|
|
# "pydantic",
|
|
# ]
|
|
# ///
|
|
# Run with: uv run packages/azure-contentunderstanding/samples/01-get-started/04_invoice_processing.py
|
|
|
|
|
|
import asyncio
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from agent_framework import Agent, AgentSession, Content, Message
|
|
from agent_framework.foundry import ContentUnderstandingContextProvider, FoundryChatClient
|
|
from azure.identity import AzureCliCredential
|
|
from dotenv import load_dotenv
|
|
from pydantic import BaseModel, Field
|
|
|
|
load_dotenv()
|
|
|
|
"""
|
|
Invoice Processing — Structured output with prebuilt-invoice analyzer
|
|
|
|
This sample demonstrates CU's structured field extraction combined with
|
|
LLM structured output (Pydantic model). The prebuilt-invoice analyzer extracts
|
|
typed fields (VendorName, InvoiceTotal, DueDate, LineItems, etc.) with
|
|
confidence scores. We use output_sections=["fields"] only (no markdown needed)
|
|
since we want the LLM to produce a structured JSON response from the extracted
|
|
fields, not summarize document text.
|
|
|
|
Environment variables:
|
|
FOUNDRY_PROJECT_ENDPOINT — Azure AI Foundry project endpoint
|
|
FOUNDRY_MODEL — Model deployment name (e.g. gpt-4.1)
|
|
AZURE_CONTENTUNDERSTANDING_ENDPOINT — CU endpoint URL
|
|
"""
|
|
|
|
SAMPLE_PDF_PATH = Path(__file__).resolve().parents[1] / "shared" / "sample_assets" / "invoice.pdf"
|
|
|
|
|
|
# Structured output model — the LLM will return JSON matching this schema
|
|
# Structured output models — the LLM returns JSON matching this schema.
|
|
#
|
|
# Note: the prebuilt-invoice analyzer extracts an extensive set of fields
|
|
# (VendorName, BillingAddress, ShippingAddress, TaxDetails, PONumber, etc.).
|
|
# This sample defines a simplified schema to extract only the fields of
|
|
# interest to the caller. The LLM maps the full CU field output to this
|
|
# subset automatically.
|
|
# Learn more about prebuilt analyzers: https://learn.microsoft.com/azure/ai-services/content-understanding/concepts/prebuilt-analyzers
|
|
|
|
|
|
class LineItem(BaseModel):
|
|
description: str
|
|
quantity: float | None = None
|
|
unit_price: float | None = None
|
|
amount: float | None = None
|
|
|
|
|
|
class LowConfidenceField(BaseModel):
|
|
field_name: str
|
|
confidence: float
|
|
|
|
|
|
class InvoiceResult(BaseModel):
|
|
vendor_name: str
|
|
total_amount: float | None = None
|
|
currency: str = "USD"
|
|
due_date: str | None = None
|
|
line_items: list[LineItem] = Field(default_factory=list)
|
|
low_confidence_fields: list[LowConfidenceField] = Field(
|
|
default_factory=list,
|
|
description="Fields with confidence < 0.8, including their confidence score",
|
|
)
|
|
|
|
|
|
async def main() -> None:
|
|
# 1. Set up credentials and CU context provider
|
|
credential = AzureCliCredential()
|
|
|
|
# Default analyzer is prebuilt-documentSearch (RAG-optimized).
|
|
# Per-file override via additional_properties["analyzer_id"] lets us
|
|
# use prebuilt-invoice for structured field extraction on specific files.
|
|
#
|
|
# Only request "fields" (not "markdown") — we want the extracted typed
|
|
# fields for structured output, not the raw document text.
|
|
cu = ContentUnderstandingContextProvider(
|
|
endpoint=os.environ["AZURE_CONTENTUNDERSTANDING_ENDPOINT"],
|
|
credential=credential,
|
|
analyzer_id="prebuilt-documentSearch", # default for all files
|
|
max_wait=None, # wait until CU analysis finishes
|
|
output_sections=["fields"], # fields only — structured output doesn't need markdown
|
|
)
|
|
|
|
# 2. Set up the LLM client
|
|
client = FoundryChatClient(
|
|
project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"],
|
|
model=os.environ["FOUNDRY_MODEL"],
|
|
credential=credential,
|
|
)
|
|
|
|
# 3. Create agent and session
|
|
async with cu:
|
|
agent = Agent(
|
|
client=client,
|
|
name="InvoiceProcessor",
|
|
instructions=(
|
|
"You are an invoice processing assistant. Extract invoice data from "
|
|
"the provided CU fields (JSON with confidence scores). Return structured "
|
|
"output matching the requested schema. Flag fields with confidence < 0.8 "
|
|
"in the low_confidence_fields list."
|
|
),
|
|
context_providers=[cu],
|
|
)
|
|
|
|
session = AgentSession()
|
|
|
|
# 4. Upload an invoice PDF — uses structured output (Pydantic model)
|
|
print("--- Upload Invoice (Structured Output) ---")
|
|
|
|
pdf_bytes = SAMPLE_PDF_PATH.read_bytes()
|
|
|
|
response = await agent.run(
|
|
Message(
|
|
role="user",
|
|
contents=[
|
|
Content.from_text(
|
|
"Process this invoice. Extract the vendor name, total amount, due date, and all line items."
|
|
),
|
|
Content.from_data(
|
|
pdf_bytes,
|
|
"application/pdf",
|
|
# Per-file analyzer override: use prebuilt-invoice for
|
|
# structured field extraction (VendorName, InvoiceTotal, etc.)
|
|
# instead of the provider default (prebuilt-documentSearch).
|
|
additional_properties={
|
|
"filename": SAMPLE_PDF_PATH.name,
|
|
"analyzer_id": "prebuilt-invoice",
|
|
},
|
|
),
|
|
],
|
|
),
|
|
session=session,
|
|
options={"response_format": InvoiceResult},
|
|
)
|
|
|
|
# Parse the structured output from JSON text
|
|
try:
|
|
invoice = InvoiceResult.model_validate_json(response.text)
|
|
print(f"Vendor: {invoice.vendor_name}")
|
|
print(f"Total: {invoice.currency} {invoice.total_amount}")
|
|
print(f"Due date: {invoice.due_date}")
|
|
print(f"Line items ({len(invoice.line_items)}):")
|
|
for item in invoice.line_items:
|
|
print(f" - {item.description}: {item.amount}")
|
|
if invoice.low_confidence_fields:
|
|
print("⚠ Low confidence fields:")
|
|
for f in invoice.low_confidence_fields:
|
|
print(f" - {f.field_name}: {f.confidence:.3f}")
|
|
except Exception:
|
|
print(f"Agent (raw): {response.text}\n")
|
|
|
|
# 5. Follow-up: free-text question about the invoice
|
|
print("\n--- Follow-up (Free Text) ---")
|
|
response = await agent.run(
|
|
"What is the payment term? Are there any fields with low confidence?",
|
|
session=session,
|
|
)
|
|
print(f"Agent: {response}\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|
|
|
|
"""
|
|
Sample output:
|
|
|
|
--- Upload Invoice (Structured Output) ---
|
|
Vendor: CONTOSO LTD.
|
|
Total: USD 110.0
|
|
Due date: 2019-12-15
|
|
Line items (3):
|
|
- Consulting Services: 60.0
|
|
- Document Fee: 30.0
|
|
- Printing Fee: 10.0
|
|
⚠ Low confidence: VendorName, CustomerName
|
|
|
|
--- Follow-up (Free Text) ---
|
|
Agent: The payment terms are not explicitly stated on the invoice...
|
|
"""
|