mirror of
https://github.com/pchuan98/codex.git
synced 2026-07-01 00:31:56 +08:00
b1cbf622ad
## Why
The Python SDK currently exposes sandbox selection differently depending
on where it is used: thread lifecycle methods accept `SandboxMode`,
while turns accept the lower-level `SandboxPolicy` shape. For the common
case of choosing an access level, that leaks app-server wire details
into otherwise straightforward SDK usage.
This makes the common path explicit and discoverable: callers choose a
named sandbox preset once, using the same keyword on threads and turns.
The preset name `workspace_write` also makes the granted capability
clear at the callsite.
## What changed
- Added a root-level `Sandbox` enum with documented presets:
- `Sandbox.read_only`: read files without allowing writes.
- `Sandbox.workspace_write`: the normal default for projects with a
recorded trust decision; read files and write inside the workspace and
configured writable roots.
- `Sandbox.full_access`: run without filesystem access restrictions.
- Documented that omitting `sandbox=` delegates to app-server's
configured default, while explicit turn overrides remain sticky for
subsequent turns.
- Updated sync and async thread lifecycle and turn APIs to consistently
accept `sandbox=Sandbox...`, translating to the existing app-server
thread and turn representations internally.
- Updated the public API artifact generator so regenerated SDK wrappers
retain the friendly enum shape.
- Replaced low-level policy construction in Python docs, examples, and
the walkthrough notebook with the preset API.
- Added focused coverage for root exports, method signatures,
preset-to-wire mapping, and rejection of raw string sandbox inputs.
## API impact
High-level turn calls now use `sandbox=` instead of `sandbox_policy=`:
```python
from openai_codex import Codex, Sandbox
with Codex() as codex:
thread = codex.thread_start(sandbox=Sandbox.workspace_write)
result = thread.run("Review the diff only.", sandbox=Sandbox.read_only)
```
`thread_start(...)` already defaults to `ApprovalMode.auto_review`, so
normal writable usage is concise:
```python
with Codex() as codex:
thread = codex.thread_start(sandbox=Sandbox.workspace_write)
thread.run("Update the files in this workspace.")
```
With that combination, edits inside `cwd` and configured writable roots
run within the workspace-write sandbox. Operations that require
approval, such as edits outside those roots, are routed through auto
review. When `sandbox=` is omitted, app-server resolves its configured
default. A sandbox supplied to `run(...)` or `turn(...)` applies to that
turn and subsequent turns.
## Test coverage
- `sdk/python/tests/test_public_api_signatures.py` covers the public
export and parameter names, including the default approval mode.
- `sdk/python/tests/test_public_api_runtime_behavior.py` covers preset
mappings to the existing wire types and raw string rejection.
204 lines
6.4 KiB
Python
204 lines
6.4 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import pytest
|
|
|
|
import openai_codex.api as public_api_module
|
|
from openai_codex.api import (
|
|
ApprovalMode,
|
|
AsyncCodex,
|
|
Codex,
|
|
Sandbox,
|
|
)
|
|
from openai_codex.generated.v2_all import TurnStartParams
|
|
from openai_codex.models import InitializeResponse
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
|
|
|
|
def _approval_settings(params: list[Any]) -> list[dict[str, object]]:
|
|
"""Return serialized approval settings from captured Pydantic params."""
|
|
return [
|
|
{
|
|
key: value
|
|
for key, value in param.model_dump(
|
|
by_alias=True,
|
|
exclude_none=True,
|
|
mode="json",
|
|
).items()
|
|
if key in {"approvalPolicy", "approvalsReviewer"}
|
|
}
|
|
for param in params
|
|
]
|
|
|
|
|
|
def test_codex_init_failure_closes_client(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
closed: list[bool] = []
|
|
|
|
class FakeClient:
|
|
def __init__(self, config=None) -> None: # noqa: ANN001,ARG002
|
|
self._closed = False
|
|
|
|
def start(self) -> None:
|
|
return None
|
|
|
|
def initialize(self) -> InitializeResponse:
|
|
return InitializeResponse.model_validate({})
|
|
|
|
def close(self) -> None:
|
|
self._closed = True
|
|
closed.append(True)
|
|
|
|
monkeypatch.setattr(public_api_module, "AppServerClient", FakeClient)
|
|
|
|
with pytest.raises(RuntimeError, match="missing required metadata"):
|
|
Codex()
|
|
|
|
assert closed == [True]
|
|
|
|
|
|
def test_async_codex_init_failure_closes_client() -> None:
|
|
async def scenario() -> None:
|
|
codex = AsyncCodex()
|
|
close_calls = 0
|
|
|
|
async def fake_start() -> None:
|
|
return None
|
|
|
|
async def fake_initialize() -> InitializeResponse:
|
|
return InitializeResponse.model_validate({})
|
|
|
|
async def fake_close() -> None:
|
|
nonlocal close_calls
|
|
close_calls += 1
|
|
|
|
codex._client.start = fake_start # type: ignore[method-assign]
|
|
codex._client.initialize = fake_initialize # type: ignore[method-assign]
|
|
codex._client.close = fake_close # type: ignore[method-assign]
|
|
|
|
with pytest.raises(RuntimeError, match="missing required metadata"):
|
|
await codex.models()
|
|
|
|
assert close_calls == 1
|
|
assert codex._initialized is False
|
|
assert codex._init is None
|
|
|
|
asyncio.run(scenario())
|
|
|
|
|
|
def test_async_codex_initializes_only_once_under_concurrency() -> None:
|
|
async def scenario() -> None:
|
|
codex = AsyncCodex()
|
|
start_calls = 0
|
|
initialize_calls = 0
|
|
ready = asyncio.Event()
|
|
|
|
async def fake_start() -> None:
|
|
nonlocal start_calls
|
|
start_calls += 1
|
|
|
|
async def fake_initialize() -> InitializeResponse:
|
|
nonlocal initialize_calls
|
|
initialize_calls += 1
|
|
ready.set()
|
|
await asyncio.sleep(0.02)
|
|
return InitializeResponse.model_validate(
|
|
{
|
|
"userAgent": "codex-cli/1.2.3",
|
|
"serverInfo": {"name": "codex-cli", "version": "1.2.3"},
|
|
}
|
|
)
|
|
|
|
async def fake_model_list(include_hidden: bool = False): # noqa: ANN202,ARG001
|
|
await ready.wait()
|
|
return object()
|
|
|
|
codex._client.start = fake_start # type: ignore[method-assign]
|
|
codex._client.initialize = fake_initialize # type: ignore[method-assign]
|
|
codex._client.model_list = fake_model_list # type: ignore[method-assign]
|
|
|
|
await asyncio.gather(codex.models(), codex.models())
|
|
|
|
assert start_calls == 1
|
|
assert initialize_calls == 1
|
|
|
|
asyncio.run(scenario())
|
|
|
|
|
|
def _approval_mode_turn_params(approval_mode: ApprovalMode) -> TurnStartParams:
|
|
"""Build real generated turn params from one public approval mode."""
|
|
approval_policy, approvals_reviewer = public_api_module._approval_mode_settings(approval_mode)
|
|
return TurnStartParams(
|
|
thread_id="thread-1",
|
|
input=[],
|
|
approval_policy=approval_policy,
|
|
approvals_reviewer=approvals_reviewer,
|
|
)
|
|
|
|
|
|
def test_approval_modes_serialize_to_expected_start_params() -> None:
|
|
"""ApprovalMode should map to the app-server params sent for new work."""
|
|
assert {
|
|
mode.value: _approval_settings([_approval_mode_turn_params(mode)])[0]
|
|
for mode in ApprovalMode
|
|
} == {
|
|
"deny_all": {"approvalPolicy": "never"},
|
|
"auto_review": {
|
|
"approvalPolicy": "on-request",
|
|
"approvalsReviewer": "auto_review",
|
|
},
|
|
}
|
|
|
|
|
|
def test_unknown_approval_mode_is_rejected() -> None:
|
|
"""Invalid approval modes should fail before params are constructed."""
|
|
with pytest.raises(ValueError, match="deny_all, auto_review"):
|
|
public_api_module._approval_mode_settings("allow_all") # type: ignore[arg-type]
|
|
|
|
|
|
def test_sandbox_presets_serialize_for_threads_and_turns() -> None:
|
|
"""One public sandbox enum should map to both stable wire representations."""
|
|
assert {
|
|
sandbox.name: public_api_module._sandbox_mode(sandbox).value for sandbox in Sandbox
|
|
} == {
|
|
"read_only": "read-only",
|
|
"workspace_write": "workspace-write",
|
|
"full_access": "danger-full-access",
|
|
}
|
|
assert {
|
|
sandbox.name: public_api_module._sandbox_policy(sandbox).model_dump(
|
|
by_alias=True,
|
|
mode="json",
|
|
)
|
|
for sandbox in Sandbox
|
|
} == {
|
|
"read_only": {"networkAccess": False, "type": "readOnly"},
|
|
"workspace_write": {
|
|
"excludeSlashTmp": False,
|
|
"excludeTmpdirEnvVar": False,
|
|
"networkAccess": False,
|
|
"type": "workspaceWrite",
|
|
"writableRoots": [],
|
|
},
|
|
"full_access": {"type": "dangerFullAccess"},
|
|
}
|
|
|
|
|
|
def test_raw_sandbox_strings_are_rejected() -> None:
|
|
"""Callers should use the discoverable enum rather than memorizing values."""
|
|
with pytest.raises(ValueError, match="Sandbox\\.workspace_write"):
|
|
public_api_module._sandbox_mode("workspace") # type: ignore[arg-type]
|
|
|
|
|
|
def test_retry_examples_compare_status_with_enum() -> None:
|
|
for path in (
|
|
ROOT / "examples" / "10_error_handling_and_retry" / "sync.py",
|
|
ROOT / "examples" / "10_error_handling_and_retry" / "async.py",
|
|
):
|
|
source = path.read_text()
|
|
assert '== "failed"' not in source
|
|
assert "TurnStatus.failed" in source
|