[codex] Preserve skill descriptions outside model context (#29006)

## Why

Skill descriptions are used in model-visible lists: the default
available-skills catalog that supports implicit selection, and the
on-demand `skills.list` tool response used to discover orchestrator
skills. A single overlong description should not consume a
disproportionate share of either list.

Enforcing the 1024-character limit while loading or migrating skills is
the wrong boundary: it rejects otherwise-valid skills and discards
metadata that non-model consumers and full skill reads may need. Skill
metadata and `SKILL.md` content should remain intact; the cap belongs at
model-visible list rendering boundaries.

## What changed

- Preserve full `description` and `metadata.short-description` values
when loading skills.
- Preserve full external-agent command descriptions during
`source-command-*` migration instead of skipping commands solely because
their descriptions exceed 1024 characters.
- Preserve full normalized orchestrator descriptions in the underlying
skills catalog.
- Cap each description at 1024 Unicode characters when rendering the
default available-skills context in `codex-core-skills` and
`codex-skills-extension`.
- Apply the same cap when serializing descriptions in the model-visible
`skills.list` response.
- Render truncated descriptions as 1021 original characters plus `...`.
- Leave explicit `$skill` injection, `skills.read`, underlying metadata,
and on-disk `SKILL.md` files unchanged and full-fidelity.

## Implicit skill selection

Codex injects a bounded catalog containing each implicitly allowed
skill's name, description, and source locator, together with
instructions to use a skill when the task clearly matches its
description. The model makes that semantic choice; after selecting a
skill, it reads the full `SKILL.md` from its filesystem or provider
resource. Explicit `$skill` mentions remain a separate path that injects
the full skill instructions. For orchestrator skills, `skills.list`
provides bounded discovery metadata before `skills.read` returns the
full selected resource.

## Test plan

- `just test -p codex-core-skills`
- `just test -p codex-skills-extension`
- `just test -p codex-external-agent-migration`

The focused regressions verify that overlong metadata is preserved at
load and migration boundaries while default available-skills rendering
and `skills.list` output produce the 1021-character prefix plus `...`.
This commit is contained in:
charlesgong-openai
2026-06-19 12:47:53 -07:00
committed by GitHub
Unverified
parent abd901770e
commit 64bdeed9f7
8 changed files with 263 additions and 34 deletions
+2 -7
View File
@@ -684,13 +684,8 @@ async fn parse_skill_file(
validate_len(&base_name, MAX_NAME_LEN, "name")?;
validate_len(&name, MAX_QUALIFIED_NAME_LEN, "qualified name")?;
validate_len(&description, MAX_DESCRIPTION_LEN, "description")?;
if let Some(short_description) = short_description.as_deref() {
validate_len(
short_description,
MAX_SHORT_DESCRIPTION_LEN,
"metadata.short-description",
)?;
if description.is_empty() {
return Err(SkillParseError::MissingField("description"));
}
let resolved_path = canonicalize_for_skill_identity(fs, path).await;
+16 -12
View File
@@ -1573,7 +1573,7 @@ async fn preserves_block_scalar_body_while_repairing_other_fields() {
}
#[tokio::test]
async fn enforces_short_description_length_limits() {
async fn preserves_overlong_short_descriptions() {
let codex_home = tempfile::tempdir().expect("tempdir");
let skill_dir = codex_home.path().join("skills/demo");
fs::create_dir_all(&skill_dir).unwrap();
@@ -1585,15 +1585,13 @@ async fn enforces_short_description_length_limits() {
let cfg = make_config(&codex_home).await;
let outcome = load_skills_for_test(&cfg).await;
assert_eq!(outcome.skills.len(), 0);
assert_eq!(outcome.errors.len(), 1);
assert!(
outcome.errors[0]
.message
.contains("invalid metadata.short-description"),
"expected length error, got: {:?}",
outcome.errors.is_empty(),
"unexpected errors: {:?}",
outcome.errors
);
assert_eq!(outcome.skills.len(), 1);
assert_eq!(outcome.skills[0].short_description, Some(too_long));
}
#[tokio::test]
@@ -1625,7 +1623,7 @@ async fn skips_hidden_and_invalid() {
}
#[tokio::test]
async fn enforces_length_limits() {
async fn preserves_overlong_descriptions() {
let codex_home = tempfile::tempdir().expect("tempdir");
let max_desc = "\u{1F4A1}".repeat(MAX_DESCRIPTION_LEN);
write_skill(&codex_home, "max-len", "max-len", &max_desc);
@@ -1642,12 +1640,18 @@ async fn enforces_length_limits() {
let too_long_desc = "\u{1F4A1}".repeat(MAX_DESCRIPTION_LEN + 1);
write_skill(&codex_home, "too-long", "too-long", &too_long_desc);
let outcome = load_skills_for_test(&cfg).await;
assert_eq!(outcome.skills.len(), 1);
assert_eq!(outcome.errors.len(), 1);
assert!(
outcome.errors[0].message.contains("invalid description"),
"expected length error"
outcome.errors.is_empty(),
"unexpected errors: {:?}",
outcome.errors
);
assert_eq!(outcome.skills.len(), 2);
let too_long_skill = outcome
.skills
.iter()
.find(|skill| skill.name == "too-long")
.expect("too-long skill");
assert_eq!(too_long_skill.description, too_long_desc);
}
#[tokio::test]
+50 -4
View File
@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::collections::HashSet;
use std::path::Component;
@@ -16,6 +17,8 @@ use codex_utils_output_truncation::approx_token_count;
const DEFAULT_SKILL_METADATA_CHAR_BUDGET: usize = 8_000;
const SKILL_METADATA_CONTEXT_WINDOW_PERCENT: usize = 2;
const MAX_DEFAULT_CONTEXT_SKILL_DESCRIPTION_CHARS: usize = 1_024;
const TRUNCATED_SKILL_DESCRIPTION_SUFFIX: &str = "...";
const SKILL_DESCRIPTION_TRUNCATION_WARNING_THRESHOLD_CHARS: usize = 100;
const APPROX_BYTES_PER_TOKEN: usize = 4;
pub const SKILL_DESCRIPTION_TRUNCATED_WARNING: &str = "Skill descriptions were shortened to fit the skills context budget. Codex can still see every skill, but some descriptions are shorter. Disable unused skills or plugins to leave more room for the rest.";
@@ -446,7 +449,7 @@ impl SkillRenderReport {
struct SkillLine<'a> {
name: &'a str,
description: &'a str,
description: Cow<'a, str>,
path: String,
}
@@ -485,9 +488,10 @@ impl<'a> SkillLine<'a> {
}
fn with_path(skill: &'a SkillMetadata, path: String) -> Self {
let description = truncate_default_context_skill_description(skill.description.as_str());
Self {
name: skill.name.as_str(),
description: skill.description.as_str(),
description,
path,
}
}
@@ -505,7 +509,7 @@ impl<'a> SkillLine<'a> {
}
fn render_full(&self) -> String {
self.render_with_description(self.description)
self.render_with_description(self.description.as_ref())
}
fn render_minimum(&self) -> String {
@@ -524,7 +528,7 @@ impl<'a> SkillLine<'a> {
format!("- {}: (file: {})", self.name, self.path)
} else {
let end = self.rendered_description_prefix_len(description_chars);
let description = &self.description[..end];
let description = &self.description.as_ref()[..end];
format!("- {}: {} (file: {})", self.name, description, self.path)
}
}
@@ -538,6 +542,26 @@ impl<'a> SkillLine<'a> {
}
}
fn truncate_default_context_skill_description(description: &str) -> Cow<'_, str> {
if description
.char_indices()
.nth(MAX_DEFAULT_CONTEXT_SKILL_DESCRIPTION_CHARS)
.is_none()
{
return Cow::Borrowed(description);
}
let prefix_chars = MAX_DEFAULT_CONTEXT_SKILL_DESCRIPTION_CHARS
.saturating_sub(TRUNCATED_SKILL_DESCRIPTION_SUFFIX.chars().count());
let prefix_end = description
.char_indices()
.nth(prefix_chars)
.map_or(description.len(), |(index, _)| index);
let mut truncated = description[..prefix_end].to_string();
truncated.push_str(TRUNCATED_SKILL_DESCRIPTION_SUFFIX);
Cow::Owned(truncated)
}
impl<'a> DescriptionBudgetLine<'a> {
fn new(line: &'a SkillLine<'a>, budget: SkillMetadataBudget) -> Self {
let minimum_line = line.render_minimum();
@@ -1030,6 +1054,28 @@ mod tests {
);
}
#[test]
fn default_context_caps_descriptions_without_mutating_metadata() {
let description = "\u{1F4A1}".repeat(MAX_DEFAULT_CONTEXT_SKILL_DESCRIPTION_CHARS + 1);
let skill = make_skill_with_description("long-skill", SkillScope::Repo, &description);
let expected_description = "\u{1F4A1}".repeat(
MAX_DEFAULT_CONTEXT_SKILL_DESCRIPTION_CHARS
- TRUNCATED_SKILL_DESCRIPTION_SUFFIX.chars().count(),
) + TRUNCATED_SKILL_DESCRIPTION_SUFFIX;
let rendered = build_available_skills_from_metadata(
std::slice::from_ref(&skill),
SkillMetadataBudget::Characters(usize::MAX),
)
.expect("skill should render");
assert_eq!(skill.description, description);
assert_eq!(
rendered.skill_lines,
vec![expected_skill_line(&skill, &expected_description)]
);
}
#[test]
fn budgeted_rendering_truncates_descriptions_equally_before_omitting_skills() {
let alpha = make_skill_with_description("alpha-skill", SkillScope::Repo, "abcdef");
@@ -28,7 +28,6 @@ const MAX_RESOURCE_PAGES: usize = 10;
const MAX_ORCHESTRATOR_SKILLS: usize = 100;
const MAX_SKILL_NAME_CHARS: usize = 64;
const MAX_QUALIFIED_SKILL_NAME_CHARS: usize = 128;
const MAX_SKILL_DESCRIPTION_CHARS: usize = 1_024;
const MAX_SKILL_PACKAGE_URI_CHARS: usize = 1_024;
const MAX_SKILL_RESOURCE_URI_CHARS: usize = 2_048;
const MAX_SKILL_RESOURCE_CONTENT_BYTES: usize = 1024 * 1024;
@@ -308,12 +307,17 @@ fn normalized_label(value: &str, max_chars: usize) -> Option<String> {
}
fn normalized_description(value: &str) -> Option<String> {
normalized_single_line(value, MAX_SKILL_DESCRIPTION_CHARS).map(|value| {
let value = value.split_whitespace().collect::<Vec<_>>().join(" ");
if value.chars().any(char::is_control) {
return None;
}
Some(
value
.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
})
.replace('>', "&gt;"),
)
}
fn normalized_single_line(value: &str, max_chars: usize) -> Option<String> {
+26 -1
View File
@@ -1,3 +1,5 @@
use std::borrow::Cow;
use codex_utils_string::take_bytes_at_char_boundary;
use crate::catalog::SkillCatalog;
@@ -7,6 +9,8 @@ use crate::fragments::AvailableSkillsInstructions;
const MAX_AVAILABLE_SKILLS_BYTES: usize = 8_000;
const MAX_MAIN_PROMPT_BYTES: usize = 8_000;
const MAX_CATALOG_SKILL_DESCRIPTION_CHARS: usize = 1_024;
const TRUNCATED_SKILL_DESCRIPTION_SUFFIX: &str = "...";
pub(crate) const MAX_SKILL_NAME_BYTES: usize = 256;
pub(crate) const MAX_SKILL_PATH_BYTES: usize = 1_024;
@@ -31,7 +35,8 @@ pub(crate) fn available_skills_fragment(
.short_description
.as_deref()
.unwrap_or(entry.description.as_str());
let line = render_skill_line(entry, description);
let description = truncate_catalog_skill_description(description);
let line = render_skill_line(entry, description.as_ref());
let next_bytes = total_bytes.saturating_add(line.len());
if next_bytes > MAX_AVAILABLE_SKILLS_BYTES {
omitted = omitted.saturating_add(1);
@@ -54,6 +59,26 @@ pub(crate) fn available_skills_fragment(
Some(AvailableSkillsInstructions::from_skill_lines(skill_lines))
}
pub(crate) fn truncate_catalog_skill_description(description: &str) -> Cow<'_, str> {
if description
.char_indices()
.nth(MAX_CATALOG_SKILL_DESCRIPTION_CHARS)
.is_none()
{
return Cow::Borrowed(description);
}
let prefix_chars = MAX_CATALOG_SKILL_DESCRIPTION_CHARS
.saturating_sub(TRUNCATED_SKILL_DESCRIPTION_SUFFIX.chars().count());
let prefix_end = description
.char_indices()
.nth(prefix_chars)
.map_or(description.len(), |(index, _)| index);
let mut truncated = description[..prefix_end].to_string();
truncated.push_str(TRUNCATED_SKILL_DESCRIPTION_SUFFIX);
Cow::Owned(truncated)
}
fn render_skill_line(entry: &SkillCatalogEntry, description: &str) -> String {
let locator_kind = match &entry.authority.kind {
SkillSourceKind::Host => "file",
+2 -1
View File
@@ -8,6 +8,7 @@ use serde::Deserialize;
use serde::Serialize;
use crate::catalog::SkillCatalogEntry;
use crate::render::truncate_catalog_skill_description;
use crate::render::truncate_utf8_to_bytes;
use super::MAX_HANDLE_BYTES;
@@ -95,7 +96,7 @@ fn listed_skill(entry: SkillCatalogEntry) -> Option<ListedSkill> {
authority,
package: entry.id.0,
name: entry.name,
description: entry.description,
description: truncate_catalog_skill_description(&entry.description).into_owned(),
main_resource: entry.main_prompt.as_str().to_string(),
})
}
@@ -10,10 +10,14 @@ use codex_core_skills::SKILLS_INTRO_WITH_ABSOLUTE_PATHS;
use codex_core_skills::SkillLoadOutcome;
use codex_core_skills::SkillMetadata;
use codex_core_skills::injection::InjectedHostSkillPrompts;
use codex_extension_api::ConversationHistory;
use codex_extension_api::ExtensionData;
use codex_extension_api::ExtensionEventSink;
use codex_extension_api::ExtensionRegistryBuilder;
use codex_extension_api::NoopTurnItemEmitter;
use codex_extension_api::ThreadStartInput;
use codex_extension_api::ToolCall;
use codex_extension_api::ToolPayload;
use codex_extension_api::TurnInputContext;
use codex_protocol::capabilities::CapabilityRootLocation;
use codex_protocol::capabilities::SelectedCapabilityRoot;
@@ -23,6 +27,7 @@ use codex_protocol::protocol::SKILLS_INSTRUCTIONS_CLOSE_TAG;
use codex_protocol::protocol::SKILLS_INSTRUCTIONS_OPEN_TAG;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SkillScope;
use codex_protocol::protocol::TruncationPolicy;
use codex_protocol::user_input::UserInput;
use codex_skills_extension::SkillProviders;
use codex_skills_extension::SkillsExtensionConfig;
@@ -255,6 +260,128 @@ async fn selected_executor_catalog_is_context_and_selected_entrypoint_is_turn_in
Ok(())
}
#[tokio::test]
async fn default_context_truncates_catalog_descriptions() -> TestResult {
let description = "x".repeat(1_025);
let mut entry = test_entry(
SkillSourceKind::Orchestrator,
"codex_apps",
"orchestrator/long-description",
"skill://orchestrator/long-description/SKILL.md",
);
entry.description = description.clone();
let providers =
SkillProviders::new().with_orchestrator_provider(Arc::new(StaticSkillProvider {
catalog: SkillCatalog {
entries: vec![entry],
warnings: Vec::new(),
},
read_requests: Arc::new(Mutex::new(Vec::new())),
list_calls: None,
fail_first_list: false,
}));
let mut builder = ExtensionRegistryBuilder::new();
install_with_providers(&mut builder, providers, skills_extension_config);
let registry = builder.build();
let session_store = ExtensionData::new("session");
let thread_store = ExtensionData::new("thread");
let session_source = SessionSource::Cli;
let config = default_config();
registry.thread_lifecycle_contributors()[0]
.on_thread_start(ThreadStartInput {
config: &config,
session_source: &session_source,
persistent_thread_state_available: true,
environments: &[],
session_store: &session_store,
thread_store: &thread_store,
})
.await;
let fragments = registry.context_contributors()[0]
.contribute_thread_context(&session_store, &thread_store)
.await;
assert_eq!(1, fragments.len());
let rendered = fragments[0].text();
assert!(rendered.contains(&("x".repeat(1_021) + "...")));
assert!(!rendered.contains(&"x".repeat(1_024)));
assert!(!rendered.contains(&description));
Ok(())
}
#[tokio::test]
async fn skills_list_truncates_catalog_descriptions_in_tool_output() -> TestResult {
let description = "x".repeat(1_025);
let mut entry = test_entry(
SkillSourceKind::Orchestrator,
"codex_apps",
"orchestrator/long-description",
"skill://orchestrator/long-description/SKILL.md",
);
entry.description = description.clone();
let providers =
SkillProviders::new().with_orchestrator_provider(Arc::new(StaticSkillProvider {
catalog: SkillCatalog {
entries: vec![entry],
warnings: Vec::new(),
},
read_requests: Arc::new(Mutex::new(Vec::new())),
list_calls: None,
fail_first_list: false,
}));
let mut builder = ExtensionRegistryBuilder::new();
install_with_providers(&mut builder, providers, skills_extension_config);
let registry = builder.build();
let session_store = ExtensionData::new("session");
let thread_store = ExtensionData::new("thread");
let session_source = SessionSource::Cli;
let config = default_config();
registry.thread_lifecycle_contributors()[0]
.on_thread_start(ThreadStartInput {
config: &config,
session_source: &session_source,
persistent_thread_state_available: true,
environments: &[],
session_store: &session_store,
thread_store: &thread_store,
})
.await;
let tools = registry.tool_contributors()[0].tools(&session_store, &thread_store);
let list_tool = tools
.iter()
.find(|tool| tool.tool_name().name == "list")
.ok_or("skills.list tool should be registered")?;
let payload = ToolPayload::Function {
arguments: serde_json::json!({"authority": {"kind": "orchestrator"}}).to_string(),
};
let output = list_tool
.handle(ToolCall {
turn_id: "turn-1".to_string(),
call_id: "call-1".to_string(),
tool_name: list_tool.tool_name(),
model: "gpt-test".to_string(),
truncation_policy: TruncationPolicy::Bytes(1_024),
conversation_history: ConversationHistory::default(),
turn_item_emitter: Arc::new(NoopTurnItemEmitter),
environments: Vec::new(),
payload: payload.clone(),
})
.await?;
let response = output
.post_tool_use_response("call-1", &payload)
.ok_or("skills.list should expose structured output")?;
let rendered_description = response["skills"][0]["description"]
.as_str()
.ok_or("skills.list response should include a description")?;
assert_eq!(rendered_description, "x".repeat(1_021) + "...");
assert_ne!(rendered_description, description);
Ok(())
}
#[tokio::test]
async fn orchestrator_catalog_snapshot_caches_failure() -> TestResult {
let list_calls = Arc::new(AtomicUsize::new(0));
+32 -5
View File
@@ -18,7 +18,6 @@ const EXTERNAL_AGENT_HOOKS_SUBDIR: &str = "hooks";
const EXTERNAL_AGENT_MIGRATED_HOOKS_SUBDIR: &str = "hooks";
const COMMAND_SKILL_PREFIX: &str = "source-command";
const MAX_SKILL_NAME_LEN: usize = 64;
const MAX_SKILL_DESCRIPTION_LEN: usize = 1024;
#[derive(Debug)]
struct ParsedDocument {
@@ -1130,14 +1129,11 @@ fn command_skill_name_if_supported(
return None;
}
let source_name = command_source_name(source_commands, source_file);
let description = command_skill_description(document, &source_name)?;
command_skill_description(document, &source_name)?;
let name = command_skill_name(source_commands, source_file);
if name.chars().count() > MAX_SKILL_NAME_LEN {
return None;
}
if description.chars().count() > MAX_SKILL_DESCRIPTION_LEN {
return None;
}
if has_unsupported_command_template_features(&document.body) {
return None;
}
@@ -1388,6 +1384,8 @@ mod tests {
use super::*;
use pretty_assertions::assert_eq;
const MAX_SKILL_DESCRIPTION_LEN: usize = 1024;
fn source_path(relative_path: &str) -> PathBuf {
Path::new("/repo")
.join(external_agent_config_dir())
@@ -1722,6 +1720,35 @@ command = "enabled-server"
assert!(command_skill_name_if_supported(&root, &file, &document).is_none());
}
#[test]
fn commands_with_overlong_descriptions_are_preserved() {
let root = source_path("commands");
let file = source_path("commands/review.md");
let description = "x".repeat(MAX_SKILL_DESCRIPTION_LEN + 1);
let document =
parse_document_content(&format!("---\ndescription: {description}\n---\nReview\n"));
assert_eq!(
command_skill_name_if_supported(&root, &file, &document),
Some("source-command-review".to_string())
);
let rendered = render_command_skill(
&document.body,
"source-command-review",
&description,
"review",
);
let rendered_document = parse_document_content(&rendered);
assert_eq!(
rendered_document
.frontmatter
.get("description")
.and_then(FrontmatterValue::as_scalar),
Some(description.as_str())
);
}
#[test]
fn commands_with_provider_runtime_expansion_are_skipped() {
let root = source_path("commands");