mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
Python: Add PDF file support to OpenAI content parser with filename mapping (#1121)
* Add mapping for application media type in OpenAI responses client * Enhance multimodal input samples: Add PDF testing functionality and fix image sample * Standardize filename handling and add multimodal samples - Standardized filename extraction logic between chat and responses clients - Both clients now omit filename when not provided (no default fallback) - Added Azure Responses API multimodal sample with PDF support - Cleaned up Azure Chat sample to focus on supported features only - Fixed test comment placement for better code documentation - Updated README with clear API capability differences * Enhance multimodal input samples with image and PDF handling - Refactor image and PDF handling in `azure_chat_multimodal.py` and `openai_chat_multimodal.py` to use new utility functions. - Add `load_sample_pdf` and `create_sample_image` functions for better test asset management. - Remove redundant code for creating sample images and PDFs. - Introduce a sample PDF file in `sample_assets` for testing purposes. * Fix formatting in OpenAI chat client --------- Co-authored-by: Dmytro Struk <13853051+dmytrostruk@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
Unverified
parent
1d27b57672
commit
8bb1266020
@@ -419,27 +419,24 @@ class OpenAIBaseChatClient(OpenAIBase, BaseChatClient):
|
||||
"format": audio_format,
|
||||
},
|
||||
}
|
||||
case DataContent() | UriContent() if content.media_type and content.media_type.startswith("application/"):
|
||||
if content.media_type == "application/pdf":
|
||||
if content.uri.startswith("data:"):
|
||||
filename = (
|
||||
getattr(content, "filename", None)
|
||||
or content.additional_properties.get("filename", "document.pdf")
|
||||
if hasattr(content, "additional_properties") and content.additional_properties
|
||||
else "document.pdf"
|
||||
)
|
||||
return {
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": content.uri, # Send full data URI
|
||||
"filename": filename,
|
||||
},
|
||||
}
|
||||
|
||||
return content.to_dict(exclude_none=True)
|
||||
|
||||
return content.to_dict(exclude_none=True)
|
||||
case DataContent() | UriContent() if content.has_top_level_media_type(
|
||||
"application"
|
||||
) and content.uri.startswith("data:"):
|
||||
# All application/* media types should be treated as files for OpenAI
|
||||
filename = getattr(content, "filename", None) or (
|
||||
content.additional_properties.get("filename")
|
||||
if hasattr(content, "additional_properties") and content.additional_properties
|
||||
else None
|
||||
)
|
||||
file_obj = {"file_data": content.uri}
|
||||
if filename:
|
||||
file_obj["filename"] = filename
|
||||
return {
|
||||
"type": "file",
|
||||
"file": file_obj,
|
||||
}
|
||||
case _:
|
||||
# Default fallback for all other content types
|
||||
return content.to_dict(exclude_none=True)
|
||||
|
||||
@override
|
||||
|
||||
@@ -454,6 +454,19 @@ class OpenAIBaseResponsesClient(OpenAIBase, BaseChatClient):
|
||||
"format": format,
|
||||
},
|
||||
}
|
||||
if content.has_top_level_media_type("application"):
|
||||
filename = getattr(content, "filename", None) or (
|
||||
content.additional_properties.get("filename")
|
||||
if hasattr(content, "additional_properties") and content.additional_properties
|
||||
else None
|
||||
)
|
||||
file_obj = {
|
||||
"type": "input_file",
|
||||
"file_data": content.uri,
|
||||
}
|
||||
if filename:
|
||||
file_obj["filename"] = filename
|
||||
return file_obj
|
||||
return {}
|
||||
case FunctionCallContent():
|
||||
return {
|
||||
|
||||
@@ -756,7 +756,12 @@ def test_openai_content_parser_data_content_image(openai_unit_test_env: dict[str
|
||||
assert result["input_audio"]["data"] == "//uQAAAAWGluZwAAAA8AAAACAAACcQ=="
|
||||
assert result["input_audio"]["format"] == "mp3"
|
||||
|
||||
# Test DataContent with PDF file
|
||||
|
||||
def test_openai_content_parser_document_file_mapping(openai_unit_test_env: dict[str, str]) -> None:
|
||||
"""Test _openai_content_parser converts document files (PDF, DOCX, etc.) to OpenAI file format."""
|
||||
client = OpenAIChatClient()
|
||||
|
||||
# Test PDF without filename - should omit filename in OpenAI payload
|
||||
pdf_data_content = DataContent(
|
||||
uri="data:application/pdf;base64,JVBERi0xLjQKJcfsj6IKNSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PgplbmRvYmoKMiAwIG9iago8PC9UeXBlL1BhZ2VzL0tpZHNbMyAwIFJdL0NvdW50IDE+PgplbmRvYmoKMyAwIG9iago8PC9UeXBlL1BhZ2UvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXS9QYXJlbnQgMiAwIFIvUmVzb3VyY2VzPDwvRm9udDw8L0YxIDQgMCBSPj4+Pi9Db250ZW50cyA1IDAgUj4+CmVuZG9iago0IDAgb2JqCjw8L1R5cGUvRm9udC9TdWJ0eXBlL1R5cGUxL0Jhc2VGb250L0hlbHZldGljYT4+CmVuZG9iago1IDAgb2JqCjw8L0xlbmd0aCA0ND4+CnN0cmVhbQpCVApxCjcwIDUwIFRECi9GMSA4IFRmCihIZWxsbyBXb3JsZCEpIFRqCkVUCmVuZHN0cmVhbQplbmRvYmoKeHJlZgowIDYKMDAwMDAwMDAwMCA2NTUzNSBmIAowMDAwMDAwMDA5IDAwMDAwIG4gCjAwMDAwMDAwNTggMDAwMDAgbiAKMDAwMDAwMDExNSAwMDAwMCBuIAowMDAwMDAwMjQ1IDAwMDAwIG4gCjAwMDAwMDAzMDcgMDAwMDAgbiAKdHJhaWxlcgo8PC9TaXplIDYvUm9vdCAxIDAgUj4+CnN0YXJ0eHJlZgo0MDUKJSVFT0Y=",
|
||||
media_type="application/pdf",
|
||||
@@ -764,14 +769,15 @@ def test_openai_content_parser_data_content_image(openai_unit_test_env: dict[str
|
||||
|
||||
result = client._openai_content_parser(pdf_data_content) # type: ignore
|
||||
|
||||
# Should convert to OpenAI file format
|
||||
# Should convert to OpenAI file format without filename
|
||||
assert result["type"] == "file"
|
||||
assert result["file"]["filename"] == "document.pdf"
|
||||
assert "filename" not in result["file"] # No filename provided, so none should be set
|
||||
assert "file_data" in result["file"]
|
||||
# Base64 data should be the full data URI (OpenAI requirement)
|
||||
assert result["file"]["file_data"].startswith("data:application/pdf;base64,")
|
||||
assert result["file"]["file_data"] == pdf_data_content.uri
|
||||
|
||||
# Test DataContent with PDF and custom filename
|
||||
# Test PDF with custom filename via additional_properties
|
||||
pdf_with_filename = DataContent(
|
||||
uri="data:application/pdf;base64,JVBERi0xLjQ=",
|
||||
media_type="application/pdf",
|
||||
@@ -783,17 +789,75 @@ def test_openai_content_parser_data_content_image(openai_unit_test_env: dict[str
|
||||
# Should use custom filename
|
||||
assert result["type"] == "file"
|
||||
assert result["file"]["filename"] == "report.pdf"
|
||||
assert result["file"]["file_data"] == "data:application/pdf;base64,JVBERi0xLjQ="
|
||||
|
||||
# Test different application/* media types - all should now be mapped to file format
|
||||
test_cases = [
|
||||
{
|
||||
"media_type": "application/json",
|
||||
"filename": "data.json",
|
||||
"base64": "eyJrZXkiOiJ2YWx1ZSJ9",
|
||||
},
|
||||
{
|
||||
"media_type": "application/xml",
|
||||
"filename": "config.xml",
|
||||
"base64": "PD94bWwgdmVyc2lvbj0iMS4wIj8+",
|
||||
},
|
||||
{
|
||||
"media_type": "application/octet-stream",
|
||||
"filename": "binary.bin",
|
||||
"base64": "AQIDBAUGBwgJCg==",
|
||||
},
|
||||
]
|
||||
|
||||
def test_openai_chat_client_with_callable_api_key() -> None:
|
||||
"""Test OpenAIChatClient initialization with callable API key."""
|
||||
for case in test_cases:
|
||||
# Test without filename
|
||||
doc_content = DataContent(
|
||||
uri=f"data:{case['media_type']};base64,{case['base64']}",
|
||||
media_type=case["media_type"],
|
||||
)
|
||||
|
||||
async def get_api_key() -> str:
|
||||
return "test-api-key-123"
|
||||
result = client._openai_content_parser(doc_content) # type: ignore
|
||||
|
||||
client = OpenAIChatClient(model_id="gpt-4o", api_key=get_api_key)
|
||||
# All application/* types should now be mapped to file format
|
||||
assert result["type"] == "file"
|
||||
assert "filename" not in result["file"] # Should omit filename when not provided
|
||||
assert result["file"]["file_data"] == doc_content.uri
|
||||
|
||||
# Verify client was created successfully
|
||||
assert client.model_id == "gpt-4o"
|
||||
# OpenAI SDK now manages callable API keys internally
|
||||
assert client.client is not None
|
||||
# Test with filename - should now use file format with filename
|
||||
doc_with_filename = DataContent(
|
||||
uri=f"data:{case['media_type']};base64,{case['base64']}",
|
||||
media_type=case["media_type"],
|
||||
additional_properties={"filename": case["filename"]},
|
||||
)
|
||||
|
||||
result = client._openai_content_parser(doc_with_filename) # type: ignore
|
||||
|
||||
# Should now use file format with filename
|
||||
assert result["type"] == "file"
|
||||
assert result["file"]["filename"] == case["filename"]
|
||||
assert result["file"]["file_data"] == doc_with_filename.uri
|
||||
|
||||
# Test edge case: empty additional_properties dict
|
||||
pdf_empty_props = DataContent(
|
||||
uri="data:application/pdf;base64,JVBERi0xLjQ=",
|
||||
media_type="application/pdf",
|
||||
additional_properties={},
|
||||
)
|
||||
|
||||
result = client._openai_content_parser(pdf_empty_props) # type: ignore
|
||||
|
||||
assert result["type"] == "file"
|
||||
assert "filename" not in result["file"]
|
||||
|
||||
# Test edge case: None filename in additional_properties
|
||||
pdf_none_filename = DataContent(
|
||||
uri="data:application/pdf;base64,JVBERi0xLjQ=",
|
||||
media_type="application/pdf",
|
||||
additional_properties={"filename": None},
|
||||
)
|
||||
|
||||
result = client._openai_content_parser(pdf_none_filename) # type: ignore
|
||||
|
||||
assert result["type"] == "file"
|
||||
assert "filename" not in result["file"] # None filename should be omitted
|
||||
|
||||
@@ -10,11 +10,17 @@ This folder contains examples demonstrating how to send multimodal content (imag
|
||||
- **Description**: Shows how to send images, audio, and PDF files to OpenAI's Chat Completions API
|
||||
- **Supported formats**: PNG/JPEG images, WAV/MP3 audio, PDF documents
|
||||
|
||||
### Azure Chat Client
|
||||
### Azure OpenAI Chat Client
|
||||
|
||||
- **File**: `azure_chat_multimodal.py`
|
||||
- **Description**: Shows how to send multimodal content to Azure OpenAI service
|
||||
- **Supported formats**: PNG/JPEG images, WAV/MP3 audio, PDF documents
|
||||
- **Description**: Shows how to send images to Azure OpenAI Chat Completions API
|
||||
- **Supported formats**: PNG/JPEG images (PDF files are NOT supported by Chat Completions API)
|
||||
|
||||
### Azure OpenAI Responses Client
|
||||
|
||||
- **File**: `azure_responses_multimodal.py`
|
||||
- **Description**: Shows how to send images and PDF files to Azure OpenAI Responses API
|
||||
- **Supported formats**: PNG/JPEG images, PDF documents (full multimodal support)
|
||||
|
||||
## Environment Variables
|
||||
|
||||
@@ -24,8 +30,10 @@ Set the following environment variables before running the examples:
|
||||
- `OPENAI_API_KEY`: Your OpenAI API key
|
||||
|
||||
**For Azure OpenAI:**
|
||||
|
||||
- `AZURE_OPENAI_ENDPOINT`: Your Azure OpenAI endpoint
|
||||
- `AZURE_OPENAI_CHAT_DEPLOYMENT_NAME`: The name of your Azure OpenAI chat model deployment
|
||||
- `AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME`: The name of your Azure OpenAI responses model deployment
|
||||
|
||||
Optionally for Azure OpenAI:
|
||||
- `AZURE_OPENAI_API_VERSION`: The API version to use (default is `2024-10-21`)
|
||||
@@ -51,8 +59,11 @@ The Azure example uses `AzureCliCredential` for authentication. Run `az login` i
|
||||
# Run OpenAI example
|
||||
python openai_chat_multimodal.py
|
||||
|
||||
# Run Azure example (requires az login or API key)
|
||||
# Run Azure Chat example (requires az login or API key)
|
||||
python azure_chat_multimodal.py
|
||||
|
||||
# Run Azure Responses example (requires az login or API key)
|
||||
python azure_responses_multimodal.py
|
||||
```
|
||||
|
||||
## Using Your Own Files
|
||||
@@ -101,8 +112,8 @@ DataContent(
|
||||
|
||||
## API Differences
|
||||
|
||||
- **Chat Completions API**: Supports images, audio, and PDF files
|
||||
- **Assistants API**: Only supports text and images (no audio/PDF)
|
||||
- **Responses API**: Similar to Chat Completions
|
||||
- **OpenAI Chat Completions API**: Supports images, audio, and PDF files
|
||||
- **Azure OpenAI Chat Completions API**: Supports images only (no PDF/audio file types)
|
||||
- **Azure OpenAI Responses API**: Supports images and PDF files (full multimodal support)
|
||||
|
||||
Choose the appropriate client based on your multimodal needs.
|
||||
Choose the appropriate client based on your multimodal needs and available APIs.
|
||||
|
||||
@@ -1,16 +1,20 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
|
||||
import requests
|
||||
from agent_framework import ChatMessage, DataContent, Role, TextContent
|
||||
from agent_framework.azure import AzureOpenAIChatClient
|
||||
from azure.identity import AzureCliCredential
|
||||
|
||||
|
||||
def create_sample_image() -> str:
|
||||
"""Create a simple 1x1 pixel PNG image for testing."""
|
||||
# This is a tiny red pixel in PNG format
|
||||
png_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg=="
|
||||
return f"data:image/png;base64,{png_data}"
|
||||
|
||||
async def test_image() -> None:
|
||||
"""Test image analysis with Azure."""
|
||||
"""Test image analysis with Azure OpenAI."""
|
||||
# For authentication, run `az login` command in terminal or replace AzureCliCredential with preferred
|
||||
# authentication option. Requires AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_CHAT_DEPLOYMENT_NAME
|
||||
# environment variables to be set.
|
||||
@@ -18,15 +22,10 @@ async def test_image() -> None:
|
||||
# client = AzureOpenAIChatClient(credential=AzureCliCredential(), deployment_name="your-deployment-name")
|
||||
client = AzureOpenAIChatClient(credential=AzureCliCredential())
|
||||
|
||||
# Fetch image from httpbin
|
||||
image_url = "https://httpbin.org/image/jpeg"
|
||||
response = requests.get(image_url)
|
||||
image_b64 = base64.b64encode(response.content).decode()
|
||||
image_uri = f"data:image/jpeg;base64,{image_b64}"
|
||||
|
||||
image_uri = create_sample_image()
|
||||
message = ChatMessage(
|
||||
role=Role.USER,
|
||||
contents=[TextContent(text="What's in this image?"), DataContent(uri=image_uri, media_type="image/jpeg")],
|
||||
contents=[TextContent(text="What's in this image?"), DataContent(uri=image_uri, media_type="image/png")],
|
||||
)
|
||||
|
||||
response = await client.get_response(message)
|
||||
@@ -34,9 +33,9 @@ async def test_image() -> None:
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
print("=== Testing Azure Multimodal ===")
|
||||
print("=== Testing Azure OpenAI Multimodal ===")
|
||||
print("Testing image analysis (supported by Chat Completions API)")
|
||||
await test_image()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
from agent_framework import ChatMessage, DataContent, Role, TextContent
|
||||
from agent_framework.azure import AzureOpenAIResponsesClient
|
||||
from azure.identity import AzureCliCredential
|
||||
|
||||
ASSETS_DIR = Path(__file__).resolve().parent.parent / "sample_assets"
|
||||
|
||||
|
||||
def load_sample_pdf() -> bytes:
|
||||
"""Read the bundled sample PDF for tests."""
|
||||
pdf_path = ASSETS_DIR / "sample.pdf"
|
||||
return pdf_path.read_bytes()
|
||||
|
||||
|
||||
def create_sample_image() -> str:
|
||||
"""Create a simple 1x1 pixel PNG image for testing."""
|
||||
# This is a tiny red pixel in PNG format
|
||||
png_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg=="
|
||||
return f"data:image/png;base64,{png_data}"
|
||||
|
||||
|
||||
async def test_image() -> None:
|
||||
"""Test image analysis with Azure OpenAI Responses API."""
|
||||
# For authentication, run `az login` command in terminal or replace AzureCliCredential with preferred
|
||||
# authentication option. Requires AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME
|
||||
# environment variables to be set.
|
||||
# Alternatively, you can pass deployment_name explicitly:
|
||||
# client = AzureOpenAIResponsesClient(credential=AzureCliCredential(), deployment_name="your-deployment-name")
|
||||
client = AzureOpenAIResponsesClient(credential=AzureCliCredential())
|
||||
|
||||
image_uri = create_sample_image()
|
||||
message = ChatMessage(
|
||||
role=Role.USER,
|
||||
contents=[TextContent(text="What's in this image?"), DataContent(uri=image_uri, media_type="image/png")],
|
||||
)
|
||||
|
||||
response = await client.get_response(message)
|
||||
print(f"Image Response: {response}")
|
||||
|
||||
|
||||
async def test_pdf() -> None:
|
||||
"""Test PDF document analysis with Azure OpenAI Responses API."""
|
||||
client = AzureOpenAIResponsesClient(credential=AzureCliCredential())
|
||||
|
||||
pdf_bytes = load_sample_pdf()
|
||||
message = ChatMessage(
|
||||
role=Role.USER,
|
||||
contents=[
|
||||
TextContent(text="What information can you extract from this document?"),
|
||||
DataContent(
|
||||
data=pdf_bytes,
|
||||
media_type="application/pdf",
|
||||
additional_properties={"filename": "sample.pdf"},
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
response = await client.get_response(message)
|
||||
print(f"PDF Response: {response}")
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
print("=== Testing Azure OpenAI Responses API Multimodal ===")
|
||||
print("The Responses API supports both images AND PDFs")
|
||||
await test_image()
|
||||
await test_pdf()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -3,36 +3,29 @@
|
||||
import asyncio
|
||||
import base64
|
||||
import struct
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from agent_framework import ChatMessage, DataContent, Role, TextContent
|
||||
from agent_framework.openai import OpenAIChatClient
|
||||
|
||||
|
||||
async def test_image() -> None:
|
||||
"""Test image analysis with OpenAI."""
|
||||
client = OpenAIChatClient(model_id="gpt-4o")
|
||||
|
||||
# Fetch image from httpbin
|
||||
image_url = "https://httpbin.org/image/jpeg"
|
||||
response = requests.get(image_url)
|
||||
image_b64 = base64.b64encode(response.content).decode()
|
||||
image_uri = f"data:image/jpeg;base64,{image_b64}"
|
||||
|
||||
message = ChatMessage(
|
||||
role=Role.USER,
|
||||
contents=[TextContent(text="What's in this image?"), DataContent(uri=image_uri, media_type="image/jpeg")],
|
||||
)
|
||||
|
||||
response = await client.get_response(message)
|
||||
print(f"Image Response: {response}")
|
||||
ASSETS_DIR = Path(__file__).resolve().parent.parent / "sample_assets"
|
||||
|
||||
|
||||
async def test_audio() -> None:
|
||||
"""Test audio analysis with OpenAI."""
|
||||
client = OpenAIChatClient(model_id="gpt-4o-audio-preview")
|
||||
def load_sample_pdf() -> bytes:
|
||||
"""Read the bundled sample PDF for tests."""
|
||||
pdf_path = ASSETS_DIR / "sample.pdf"
|
||||
return pdf_path.read_bytes()
|
||||
|
||||
# Create minimal WAV file (0.1 seconds of silence)
|
||||
|
||||
def create_sample_image() -> str:
|
||||
"""Create a simple 1x1 pixel PNG image for testing."""
|
||||
# This is a tiny red pixel in PNG format
|
||||
png_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg=="
|
||||
return f"data:image/png;base64,{png_data}"
|
||||
|
||||
|
||||
def create_sample_audio() -> str:
|
||||
"""Create a minimal WAV file for testing (0.1 seconds of silence)."""
|
||||
wav_header = (
|
||||
b"RIFF"
|
||||
+ struct.pack("<I", 44) # file size
|
||||
@@ -44,8 +37,28 @@ async def test_audio() -> None:
|
||||
+ b"\x00" * 1600 # 0.1 sec silence
|
||||
)
|
||||
audio_b64 = base64.b64encode(wav_header).decode()
|
||||
audio_uri = f"data:audio/wav;base64,{audio_b64}"
|
||||
return f"data:audio/wav;base64,{audio_b64}"
|
||||
|
||||
|
||||
async def test_image() -> None:
|
||||
"""Test image analysis with OpenAI."""
|
||||
client = OpenAIChatClient(model_id="gpt-4o")
|
||||
|
||||
image_uri = create_sample_image()
|
||||
message = ChatMessage(
|
||||
role=Role.USER,
|
||||
contents=[TextContent(text="What's in this image?"), DataContent(uri=image_uri, media_type="image/png")],
|
||||
)
|
||||
|
||||
response = await client.get_response(message)
|
||||
print(f"Image Response: {response}")
|
||||
|
||||
|
||||
async def test_audio() -> None:
|
||||
"""Test audio analysis with OpenAI."""
|
||||
client = OpenAIChatClient(model_id="gpt-4o-audio-preview")
|
||||
|
||||
audio_uri = create_sample_audio()
|
||||
message = ChatMessage(
|
||||
role=Role.USER,
|
||||
contents=[
|
||||
@@ -58,10 +71,30 @@ async def test_audio() -> None:
|
||||
print(f"Audio Response: {response}")
|
||||
|
||||
|
||||
async def test_pdf() -> None:
|
||||
"""Test PDF document analysis with OpenAI."""
|
||||
client = OpenAIChatClient(model_id="gpt-4o")
|
||||
|
||||
pdf_bytes = load_sample_pdf()
|
||||
message = ChatMessage(
|
||||
role=Role.USER,
|
||||
contents=[
|
||||
TextContent(text="What information can you extract from this document?"),
|
||||
DataContent(
|
||||
data=pdf_bytes, media_type="application/pdf", additional_properties={"filename": "employee_report.pdf"}
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
response = await client.get_response(message)
|
||||
print(f"PDF Response: {response}")
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
print("=== Testing OpenAI Multimodal ===")
|
||||
await test_image()
|
||||
await test_audio()
|
||||
await test_pdf()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user