diff --git a/sdk/python/docs/api-reference.md b/sdk/python/docs/api-reference.md index f003a2851..f253185dc 100644 --- a/sdk/python/docs/api-reference.md +++ b/sdk/python/docs/api-reference.md @@ -245,6 +245,9 @@ Input = list[InputItem] | InputItem RunInput = Input | str ``` +Use `ImageInput` with a base64-encoded `data:image/...` URL. HTTP and HTTPS image URLs are +deprecated; download remote images and pass their local paths with `LocalImageInput` instead. + Use a plain `str` as shorthand for `TextInput(...)` anywhere a turn input is accepted: `thread.run("...")`, `thread.turn("...")`, and `turn.steer("...")`. diff --git a/sdk/python/examples/07_image_and_text/async.py b/sdk/python/examples/07_image_and_text/async.py index c3237e320..8673e455a 100644 --- a/sdk/python/examples/07_image_and_text/async.py +++ b/sdk/python/examples/07_image_and_text/async.py @@ -5,7 +5,7 @@ _EXAMPLES_ROOT = Path(__file__).resolve().parents[1] if str(_EXAMPLES_ROOT) not in sys.path: sys.path.insert(0, str(_EXAMPLES_ROOT)) -from _bootstrap import ensure_local_sdk_src, runtime_config +from _bootstrap import ensure_local_sdk_src, generated_sample_image_data_url, runtime_config ensure_local_sdk_src() @@ -13,7 +13,7 @@ import asyncio from openai_codex import AsyncCodex, ImageInput, TextInput -REMOTE_IMAGE_URL = "https://raw.githubusercontent.com/github/explore/main/topics/python/python.png" +IMAGE_DATA_URL = generated_sample_image_data_url() async def main() -> None: @@ -24,7 +24,7 @@ async def main() -> None: turn = await thread.turn( [ TextInput("What is in this image? Give 3 bullets."), - ImageInput(REMOTE_IMAGE_URL), + ImageInput(IMAGE_DATA_URL), ] ) result = await turn.run() diff --git a/sdk/python/examples/07_image_and_text/sync.py b/sdk/python/examples/07_image_and_text/sync.py index f7402f18c..1b20f8462 100644 --- a/sdk/python/examples/07_image_and_text/sync.py +++ b/sdk/python/examples/07_image_and_text/sync.py @@ -5,20 +5,20 @@ _EXAMPLES_ROOT = Path(__file__).resolve().parents[1] if str(_EXAMPLES_ROOT) not in sys.path: sys.path.insert(0, str(_EXAMPLES_ROOT)) -from _bootstrap import ensure_local_sdk_src, runtime_config +from _bootstrap import ensure_local_sdk_src, generated_sample_image_data_url, runtime_config ensure_local_sdk_src() from openai_codex import Codex, ImageInput, TextInput -REMOTE_IMAGE_URL = "https://raw.githubusercontent.com/github/explore/main/topics/python/python.png" +IMAGE_DATA_URL = generated_sample_image_data_url() with Codex(config=runtime_config()) as codex: thread = codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"}) result = thread.turn( [ TextInput("What is in this image? Give 3 bullets."), - ImageInput(REMOTE_IMAGE_URL), + ImageInput(IMAGE_DATA_URL), ] ).run() diff --git a/sdk/python/examples/README.md b/sdk/python/examples/README.md index 719fb29a4..a86580cc7 100644 --- a/sdk/python/examples/README.md +++ b/sdk/python/examples/README.md @@ -72,7 +72,7 @@ python examples/01_quickstart_constructor/async.py - `06_thread_lifecycle_and_controls/` - thread lifecycle + control calls - `07_image_and_text/` - - remote image URL + text multimodal turn + - image data URL + text multimodal turn - `08_local_image_and_text/` - local image + text multimodal turn using a generated temporary sample image - `09_async_parity/` diff --git a/sdk/python/examples/_bootstrap.py b/sdk/python/examples/_bootstrap.py index fde7a32c0..88039f4b9 100644 --- a/sdk/python/examples/_bootstrap.py +++ b/sdk/python/examples/_bootstrap.py @@ -1,5 +1,6 @@ from __future__ import annotations +import base64 import contextlib import importlib.util import sys @@ -95,6 +96,11 @@ def _generated_sample_png_bytes() -> bytes: ) +def generated_sample_image_data_url() -> str: + encoded = base64.b64encode(_generated_sample_png_bytes()).decode("ascii") + return f"data:image/png;base64,{encoded}" + + @contextlib.contextmanager def temporary_sample_image_path() -> Iterator[Path]: with tempfile.TemporaryDirectory(prefix="codex-python-example-image-") as temp_root: diff --git a/sdk/python/notebooks/sdk_walkthrough.ipynb b/sdk/python/notebooks/sdk_walkthrough.ipynb index 1bb9bf731..3f84cf067 100644 --- a/sdk/python/notebooks/sdk_walkthrough.ipynb +++ b/sdk/python/notebooks/sdk_walkthrough.ipynb @@ -103,7 +103,7 @@ "outputs": [], "source": [ "# Cell 2: imports (public only)\n", - "from _bootstrap import server_label\n", + "from _bootstrap import generated_sample_image_data_url, server_label\n", "from openai_codex import (\n", " AsyncCodex,\n", " Codex,\n", @@ -349,14 +349,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Cell 6: multimodal with remote image\n", - "remote_image_url = 'https://raw.githubusercontent.com/github/explore/main/topics/python/python.png'\n", + "# Cell 6: multimodal with an image data URL\n", + "image_data_url = generated_sample_image_data_url()\n", "\n", "with Codex() as codex:\n", " thread = codex.thread_start(model='gpt-5.4', config={'model_reasoning_effort': 'high'})\n", " result = thread.turn([\n", " TextInput('What do you see in this image? 3 bullets.'),\n", - " ImageInput(remote_image_url),\n", + " ImageInput(image_data_url),\n", " ]).run()\n", " print('status:', result.status)\n", " print(result.final_response)\n" diff --git a/sdk/python/src/openai_codex/_inputs.py b/sdk/python/src/openai_codex/_inputs.py index a6e5e7b52..0a1fbe110 100644 --- a/sdk/python/src/openai_codex/_inputs.py +++ b/sdk/python/src/openai_codex/_inputs.py @@ -14,7 +14,7 @@ class TextInput: @dataclass(slots=True) class ImageInput: - """Remote image URL supplied as turn input.""" + """Image data URL supplied as turn input.""" url: str diff --git a/sdk/python/tests/test_app_server_inputs.py b/sdk/python/tests/test_app_server_inputs.py index 56505e481..29b8fc772 100644 --- a/sdk/python/tests/test_app_server_inputs.py +++ b/sdk/python/tests/test_app_server_inputs.py @@ -1,40 +1,45 @@ from __future__ import annotations +import base64 + from app_server_harness import AppServerHarness from app_server_helpers import TINY_PNG_BYTES from openai_codex import Codex, ImageInput, LocalImageInput, SkillInput, TextInput -def test_remote_image_input_reaches_responses_api( +def test_data_url_image_input_reaches_responses_api( tmp_path, ) -> None: - """Remote image inputs should survive the SDK and app-server boundary.""" - remote_image_url = "https://example.com/codex.png" + """Data URL image inputs should survive the SDK and app-server boundary.""" + image_data_url = "data:image/png;base64," + base64.b64encode(TINY_PNG_BYTES).decode("ascii") with AppServerHarness(tmp_path) as harness: harness.responses.enqueue_assistant_message( - "remote image received", - response_id="remote-image", + "data URL image received", + response_id="data-url-image", ) with Codex(config=harness.app_server_config()) as codex: result = codex.thread_start().run( [ - TextInput("Describe the remote image."), - ImageInput(remote_image_url), + TextInput("Describe the data URL image."), + ImageInput(image_data_url), ] ) request = harness.responses.single_request() assert { "final_response": result.final_response, - "contains_user_prompt": "Describe the remote image." in request.message_input_texts("user"), - "image_urls": request.message_image_urls("user"), + "contains_user_prompt": "Describe the data URL image." + in request.message_input_texts("user"), + "image_url_is_png_data_url": request.message_image_urls("user")[-1].startswith( + "data:image/png;base64," + ), } == { - "final_response": "remote image received", + "final_response": "data URL image received", "contains_user_prompt": True, - "image_urls": [remote_image_url], + "image_url_is_png_data_url": True, }