feat: support oneOf and allOf in tool input schemas (#24118)

## Why

Some connector golden schemas use JSON Schema composition keywords
beyond `anyOf`, specifically top-level or nested `oneOf` and `allOf`.
Codex currently needs to preserve those shapes when parsing MCP tool
input schemas so connector tools do not lose valid schema structure
during normalization.

To prevent an increased Responses API error rate, this PR will be merged
after the Responses API supports top-level `oneOf`/`allOf`.

## What Changed

- Adds `oneOf` and `allOf` support to `JsonSchema`, matching the
existing `anyOf` handling.
- Traverses `oneOf` and `allOf` anywhere schema children are visited,
including sanitization, definition reachability, description stripping,
and deep schema compaction.
- Adds a final large-schema compaction pass that prunes schema objects
containing `anyOf`, `oneOf`, or `allOf` to `{}` if earlier compaction
passes still leave the schema over budget.

## Validation
Golden schema token validation over `2,025` schemas under
`golden_schemas`, all parsed successfully. Token count is `o200k_base`
over compact JSON from `parse_tool_input_schema`.

| Percentile | Before PR | After oneOf/allOf | After pruning |
|---|---:|---:|---:|
| p0 | 9 | 9 | 9 |
| p10 | 63 | 64 | 64 |
| p25 | 86 | 87 | 87 |
| p50 | 125 | 128 | 128 |
| p75 | 203 | 206 | 206 |
| p90 | 327 | 333 | 333 |
| p95 | 460 | 473 | 473 |
| p99 | 763 | 779 | 779 |
| max | 891 | 955 | 955 |

Totals:

| Parser state | Total tokens |
|---|---:|
| Before PR | 345,713 |
| After oneOf/allOf | 352,686 |
| After pruning | 352,686 |

The pruning column matches the oneOf/allOf column for this corpus
because no parsed compact golden schema remains over the `4,000`
compact-byte budget after the earlier compaction passes.
This commit is contained in:
Celia Chen
2026-06-08 14:41:05 -07:00
committed by GitHub
Unverified
parent 56554904ba
commit b89d91f6ff
3 changed files with 270 additions and 10 deletions
+59 -6
View File
@@ -6,13 +6,15 @@ use std::collections::BTreeMap;
use std::collections::BTreeSet;
const DEFINITION_TABLE_KEYS: [&str; 2] = ["$defs", "definitions"];
const SCHEMA_CHILD_KEYS: [&str; 2] = ["items", "anyOf"];
const SCHEMA_CHILD_KEYS: [&str; 4] = ["items", "anyOf", "oneOf", "allOf"];
const COMPOSITION_SCHEMA_KEYS: [&str; 3] = ["anyOf", "oneOf", "allOf"];
/// Primitive JSON Schema type names we support in tool definitions.
///
/// This mirrors the OpenAI Structured Outputs subset for JSON Schema `type`:
/// string, number, boolean, integer, object, array, and null.
/// Keywords such as `enum`, `const`, and `anyOf` are modeled separately.
/// Keywords such as `enum`, `const`, `anyOf`, `oneOf`, and `allOf` are modeled
/// separately.
/// See <https://developers.openai.com/api/docs/guides/structured-outputs#supported-schemas>.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
@@ -61,6 +63,10 @@ pub struct JsonSchema {
pub additional_properties: Option<AdditionalProperties>,
#[serde(rename = "anyOf", skip_serializing_if = "Option::is_none")]
pub any_of: Option<Vec<JsonSchema>>,
#[serde(rename = "oneOf", skip_serializing_if = "Option::is_none")]
pub one_of: Option<Vec<JsonSchema>>,
#[serde(rename = "allOf", skip_serializing_if = "Option::is_none")]
pub all_of: Option<Vec<JsonSchema>>,
#[serde(rename = "$defs", skip_serializing_if = "Option::is_none")]
pub defs: Option<BTreeMap<String, JsonSchema>>,
#[serde(skip_serializing_if = "Option::is_none")]
@@ -85,6 +91,22 @@ impl JsonSchema {
}
}
pub fn one_of(variants: Vec<JsonSchema>, description: Option<String>) -> Self {
Self {
description,
one_of: Some(variants),
..Default::default()
}
}
pub fn all_of(variants: Vec<JsonSchema>, description: Option<String>) -> Self {
Self {
description,
all_of: Some(variants),
..Default::default()
}
}
pub fn boolean(description: Option<String>) -> Self {
Self::typed(JsonSchemaPrimitiveType::Boolean, description)
}
@@ -219,6 +241,7 @@ const LARGE_SCHEMA_COMPACTION_PASSES: &[LargeSchemaCompactionPass] = &[
strip_schema_descriptions,
drop_schema_definitions,
collapse_deep_schema_objects_from_root,
prune_schema_compositions,
];
fn collapse_deep_schema_objects_from_root(value: &mut JsonValue) {
@@ -397,6 +420,27 @@ fn collapse_deep_schema_objects(value: &mut JsonValue, depth: usize) {
}
}
fn prune_schema_compositions(value: &mut JsonValue) {
match value {
JsonValue::Array(values) => {
for value in values {
prune_schema_compositions(value);
}
}
JsonValue::Object(map) => {
if has_composition_keyword(map) {
*value = json!({});
return;
}
for_each_schema_child_mut(map, DefinitionTraversal::Skip, &mut |value| {
prune_schema_compositions(value);
});
}
_ => {}
}
}
fn is_complex_schema_object(map: &serde_json::Map<String, JsonValue>) -> bool {
SCHEMA_CHILD_KEYS.iter().any(|key| map.contains_key(*key))
|| map.contains_key("properties")
@@ -404,10 +448,16 @@ fn is_complex_schema_object(map: &serde_json::Map<String, JsonValue>) -> bool {
|| map.contains_key("$ref")
}
fn has_composition_keyword(map: &serde_json::Map<String, JsonValue>) -> bool {
COMPOSITION_SCHEMA_KEYS
.into_iter()
.any(|key| map.contains_key(key))
}
/// Sanitize a JSON Schema (as serde_json::Value) so it can fit our limited
/// schema representation. This function:
/// - Ensures every typed schema object has a `"type"` when required.
/// - Preserves explicit `anyOf`.
/// - Preserves explicit `anyOf`, `oneOf`, and `allOf`.
/// - Preserves `$ref` and reachable local `$defs` / `definitions`.
/// - Collapses `const` into single-value `enum`.
/// - Fills required child fields for object/array schema types, including
@@ -443,8 +493,10 @@ fn sanitize_json_schema(value: &mut JsonValue) {
if let Some(value) = map.get_mut("prefixItems") {
sanitize_json_schema(value);
}
if let Some(value) = map.get_mut("anyOf") {
sanitize_json_schema(value);
for key in COMPOSITION_SCHEMA_KEYS {
if let Some(value) = map.get_mut(key) {
sanitize_json_schema(value);
}
}
for table in DEFINITION_TABLE_KEYS {
sanitize_schema_table(map, table);
@@ -456,7 +508,8 @@ fn sanitize_json_schema(value: &mut JsonValue) {
let mut schema_types = normalized_schema_types(map);
if schema_types.is_empty() && (map.contains_key("$ref") || map.contains_key("anyOf")) {
if schema_types.is_empty() && (map.contains_key("$ref") || has_composition_keyword(map))
{
return;
}
+202
View File
@@ -728,6 +728,109 @@ fn parse_tool_input_schema_preserves_nested_any_of_property() {
);
}
#[test]
fn parse_tool_input_schema_preserves_nested_one_of_property() {
// Example schema shape:
// {
// "type": "object",
// "properties": {
// "query": {
// "oneOf": [
// { "const": "exact" },
// { "type": "number" }
// ]
// }
// }
// }
//
// Expected normalization behavior:
// - The nested `oneOf` is preserved.
// - Child variants are recursively sanitized, including `const` to `enum`.
let schema = parse_tool_input_schema(&serde_json::json!({
"type": "object",
"properties": {
"query": {
"oneOf": [
{ "const": "exact" },
{ "type": "number" }
]
}
}
}))
.expect("parse schema");
assert_eq!(
schema,
JsonSchema::object(
BTreeMap::from([(
"query".to_string(),
JsonSchema::one_of(
vec![
JsonSchema::string_enum(
vec![serde_json::json!("exact")],
/*description*/ None,
),
JsonSchema::number(/*description*/ None),
],
/*description*/ None,
),
)]),
/*required*/ None,
/*additional_properties*/ None
)
);
}
#[test]
fn parse_tool_input_schema_preserves_nested_all_of_property() {
// Example schema shape:
// {
// "type": "object",
// "properties": {
// "query": {
// "allOf": [
// { "type": "string" },
// { "description": "unrecognized by itself" }
// ]
// }
// }
// }
//
// Expected normalization behavior:
// - The nested `allOf` is preserved structurally rather than flattened.
// - Child variants are recursively sanitized.
let schema = parse_tool_input_schema(&serde_json::json!({
"type": "object",
"properties": {
"query": {
"allOf": [
{ "type": "string" },
{ "description": "unrecognized by itself" }
]
}
}
}))
.expect("parse schema");
assert_eq!(
schema,
JsonSchema::object(
BTreeMap::from([(
"query".to_string(),
JsonSchema::all_of(
vec![
JsonSchema::string(/*description*/ None),
JsonSchema::default(),
],
/*description*/ None,
),
)]),
/*required*/ None,
/*additional_properties*/ None
)
);
}
#[test]
fn parse_tool_input_schema_preserves_type_unions_without_rewriting_to_any_of() {
// Example schema shape:
@@ -1070,6 +1173,77 @@ fn parse_large_tool_input_schema_strips_descriptions_without_removing_descriptio
);
}
#[test]
fn parse_large_tool_input_schema_prunes_compositions_as_last_resort() {
for composition_key in super::COMPOSITION_SCHEMA_KEYS {
let variants = vec![
serde_json::json!({
"type": "string",
"enum": ["first ".repeat(400)]
}),
serde_json::json!({
"type": "string",
"enum": ["second ".repeat(400)]
}),
serde_json::json!({
"type": "string",
"enum": ["third ".repeat(400)]
}),
];
let mut choice = serde_json::Map::new();
choice.insert(
composition_key.to_string(),
serde_json::Value::Array(variants),
);
let schema = parse_tool_input_schema(&serde_json::json!({
"type": "object",
"properties": {
"choice": choice
}
}))
.expect("parse schema");
assert_eq!(
serde_json::to_value(schema).expect("serialize schema"),
serde_json::json!({
"type": "object",
"properties": {
"choice": {}
}
})
);
}
}
#[test]
fn parse_large_tool_input_schema_prunes_single_composition_variant_if_still_over_budget() {
let schema = parse_tool_input_schema(&serde_json::json!({
"type": "object",
"properties": {
"choice": {
"anyOf": [
{
"type": "string",
"enum": ["x".repeat(4_500)]
}
]
}
}
}))
.expect("parse schema");
assert_eq!(
serde_json::to_value(schema).expect("serialize schema"),
serde_json::json!({
"type": "object",
"properties": {
"choice": {}
}
})
);
}
#[test]
fn parse_large_tool_input_schema_preserves_object_enum_literal_descriptions() {
let schema = parse_tool_input_schema(&serde_json::json!({
@@ -1397,10 +1571,24 @@ fn parse_tool_input_schema_collects_refs_from_schema_child_keywords() {
{"$ref": "#/$defs/Choice"},
{"type": "string"}
]
},
"exclusive_choice": {
"oneOf": [
{"$ref": "#/$defs/ExclusiveChoice"},
{"type": "integer"}
]
},
"combined": {
"allOf": [
{"$ref": "#/$defs/Combined"},
{"type": "object"}
]
}
},
"$defs": {
"Combined": {"type": "object"},
"Choice": {"type": "boolean"},
"ExclusiveChoice": {"type": "null"},
"Extra": {"type": "number"},
"Item": {"type": "string"},
"Unused": {"type": "null"}
@@ -1419,6 +1607,18 @@ fn parse_tool_input_schema_collects_refs_from_schema_child_keywords() {
{"type": "string"}
]
},
"combined": {
"allOf": [
{"$ref": "#/$defs/Combined"},
{"type": "object", "properties": {}}
]
},
"exclusive_choice": {
"oneOf": [
{"$ref": "#/$defs/ExclusiveChoice"},
{"type": "integer"}
]
},
"items_holder": {
"type": "array",
"items": {"$ref": "#/$defs/Item"}
@@ -1430,7 +1630,9 @@ fn parse_tool_input_schema_collects_refs_from_schema_child_keywords() {
}
},
"$defs": {
"Combined": {"type": "object", "properties": {}},
"Choice": {"type": "boolean"},
"ExclusiveChoice": {"type": "null"},
"Extra": {"type": "number"},
"Item": {"type": "string"}
}
@@ -53,13 +53,18 @@
{
"pointer": "/properties/title/description",
"value": "Copied file title."
},
{
"pointer": "/properties/file/oneOf/0/type",
"value": "string"
},
{
"pointer": "/properties/metadata/allOf/0/properties/source/type",
"value": "string"
}
],
"expected_pruned": [],
"expected_dropped_fields": [
"/properties/file/oneOf",
"/properties/metadata/allOf"
]
"expected_dropped_fields": []
}
]
}