mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
Python: Parse YAML block scalars in SKILL.md frontmatter (#5863)
The frontmatter parser previously matched only single-line `key: value` pairs, so block scalar indicators (`|` literal, `>` folded, with chomping `-`/`+`) were silently truncated to the indicator character. Multi-line descriptions like `description: >\n ...` lost their content. Add `_parse_yaml_scalar_value()` which detects block scalar indicators, collects indented continuation lines, strips the common leading indentation, joins per scalar style (newlines for `|`, spaces for `>`), and applies chomping per the YAML 1.2 spec. Update `_extract_frontmatter()` to use the helper for unquoted values. Adds 15 unit tests covering literal/folded styles, all chomping variants, indentation handling, content containing colons, non-description fields, tab indentation, blank-line preservation, and a regression test for plain values. Fixes #5713. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
Unverified
parent
ad95f2f2fa
commit
19b2367366
@@ -1513,6 +1513,97 @@ YAML_INDENTED_KV_RE = re.compile(
|
||||
# must not start or end with a hyphen, and must not contain consecutive hyphens.
|
||||
VALID_NAME_RE = re.compile(r"^[a-z0-9]([a-z0-9]*-[a-z0-9])*[a-z0-9]*$")
|
||||
|
||||
# Block scalar indicator characters recognised by the lightweight YAML parser.
|
||||
_BLOCK_SCALAR_INDICATORS = ("|", ">")
|
||||
|
||||
|
||||
def _parse_yaml_scalar_value(yaml_content: str, kv_match: re.Match[str]) -> str:
|
||||
"""Resolve the scalar value for an unquoted YAML key-value match.
|
||||
|
||||
If the captured value starts with a YAML block scalar indicator (``|`` or
|
||||
``>``), the function reads subsequent indented continuation lines, strips
|
||||
the common leading indentation, and joins them according to the scalar
|
||||
style (literal preserves newlines, folded replaces them with spaces).
|
||||
|
||||
Chomping indicators are respected per YAML 1.2 §8.1.1.2:
|
||||
|
||||
* ``-`` (strip) — final line break and trailing empty lines excluded
|
||||
* ``+`` (keep) — final line break and any trailing empty lines preserved
|
||||
* default (clip) — final line break preserved, trailing empty lines excluded
|
||||
|
||||
For plain (non-block-scalar) values the captured text is returned as-is.
|
||||
Note: explicit indentation indicators (e.g. ``|2``) are not supported;
|
||||
indentation is auto-detected from the common leading whitespace.
|
||||
"""
|
||||
value: str = kv_match.group(3)
|
||||
|
||||
if not value or value[0] not in _BLOCK_SCALAR_INDICATORS:
|
||||
return value
|
||||
|
||||
scalar_style = value[0]
|
||||
keep_trailing_newline = len(value) > 1 and value[1] == "+"
|
||||
strip_trailing_newline = len(value) > 1 and value[1] == "-"
|
||||
|
||||
# Find the start of the next line after this key-value match.
|
||||
next_line_start = yaml_content.find("\n", kv_match.end())
|
||||
if next_line_start < 0:
|
||||
return value
|
||||
next_line_start += 1 # skip the newline character itself
|
||||
|
||||
# Collect indented continuation lines (or blank lines within the block).
|
||||
block_lines: list[str] = []
|
||||
pos = next_line_start
|
||||
while pos < len(yaml_content):
|
||||
line_end = yaml_content.find("\n", pos)
|
||||
if line_end < 0:
|
||||
line = yaml_content[pos:]
|
||||
line_end = len(yaml_content)
|
||||
else:
|
||||
line = yaml_content[pos:line_end]
|
||||
|
||||
if not line or line.isspace():
|
||||
# Blank / whitespace-only lines are part of the block.
|
||||
block_lines.append("")
|
||||
pos = line_end + 1 if line_end < len(yaml_content) else line_end
|
||||
continue
|
||||
|
||||
if line[0] not in (" ", "\t"):
|
||||
# Non-indented, non-blank line — end of the block.
|
||||
break
|
||||
|
||||
block_lines.append(line)
|
||||
pos = line_end + 1 if line_end < len(yaml_content) else line_end
|
||||
|
||||
# Strip trailing blank lines collected from the block.
|
||||
while block_lines and block_lines[-1] == "":
|
||||
block_lines.pop()
|
||||
|
||||
if not block_lines:
|
||||
return ""
|
||||
|
||||
# Determine the common leading indentation across non-empty lines.
|
||||
# Only space/tab characters count as indentation (matches YAML semantics).
|
||||
def _indent_width(s: str) -> int:
|
||||
i = 0
|
||||
while i < len(s) and s[i] in (" ", "\t"):
|
||||
i += 1
|
||||
return i
|
||||
|
||||
common_indent = min(_indent_width(line) for line in block_lines if line)
|
||||
normalized = [line[common_indent:] if line else "" for line in block_lines]
|
||||
|
||||
# Literal preserves newlines; folded joins non-empty lines with spaces.
|
||||
parsed = "\n".join(normalized) if scalar_style == "|" else " ".join(line for line in normalized if line)
|
||||
|
||||
if keep_trailing_newline:
|
||||
return parsed + "\n"
|
||||
if strip_trailing_newline:
|
||||
return parsed
|
||||
# Clip (default): literal gets a trailing newline, folded does not.
|
||||
if scalar_style == "|":
|
||||
return parsed + "\n"
|
||||
return parsed
|
||||
|
||||
|
||||
# Default system prompt template for advertising available skills to the model.
|
||||
# Use {skills} as the placeholder for the generated skills XML list.
|
||||
@@ -2879,7 +2970,9 @@ class FileSkillsSource(SkillsSource):
|
||||
|
||||
for kv_match in YAML_KV_RE.finditer(yaml_content):
|
||||
key = kv_match.group(1)
|
||||
value = kv_match.group(2) if kv_match.group(2) is not None else kv_match.group(3)
|
||||
value = (
|
||||
kv_match.group(2) if kv_match.group(2) is not None else _parse_yaml_scalar_value(yaml_content, kv_match)
|
||||
)
|
||||
|
||||
key_lower = key.lower()
|
||||
if key_lower == "name":
|
||||
|
||||
@@ -319,9 +319,7 @@ class TestDiscoverResourceFiles:
|
||||
refs = skill_dir / "references"
|
||||
refs.mkdir(parents=True)
|
||||
(refs / "doc.md").write_text("content", encoding="utf-8")
|
||||
resources = FileSkillsSource._discover_resource_files(
|
||||
str(skill_dir), directories=("references", "references")
|
||||
)
|
||||
resources = FileSkillsSource._discover_resource_files(str(skill_dir), directories=("references", "references"))
|
||||
assert resources == ["references/doc.md"]
|
||||
|
||||
def test_results_are_sorted(self, tmp_path: Path) -> None:
|
||||
@@ -1675,9 +1673,7 @@ class TestValidateAndNormalizeDirectoryNames:
|
||||
FileSkillsSource._validate_and_normalize_directory_names([" "])
|
||||
|
||||
def test_multiple_directories(self) -> None:
|
||||
result = FileSkillsSource._validate_and_normalize_directory_names(
|
||||
[".", "references", "assets", "scripts"]
|
||||
)
|
||||
result = FileSkillsSource._validate_and_normalize_directory_names([".", "references", "assets", "scripts"])
|
||||
assert result == [".", "references", "assets", "scripts"]
|
||||
|
||||
def test_default_resource_directories(self) -> None:
|
||||
@@ -2163,6 +2159,145 @@ class TestExtractFrontmatterEdgeCases:
|
||||
assert result.description == desc
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: _extract_frontmatter block scalar parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestExtractFrontmatterBlockScalars:
|
||||
"""Tests for YAML block scalar (| and >) parsing in _extract_frontmatter."""
|
||||
|
||||
def test_literal_block_scalar(self) -> None:
|
||||
content = "---\nname: test-skill\ndescription: |\n Line one\n Line two\n---\nBody."
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.description == "Line one\nLine two\n"
|
||||
|
||||
def test_folded_block_scalar(self) -> None:
|
||||
content = "---\nname: test-skill\ndescription: >\n This is a multi-line\n description block\n---\nBody."
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.description == "This is a multi-line description block"
|
||||
|
||||
def test_literal_strip_chomping(self) -> None:
|
||||
content = "---\nname: test-skill\ndescription: |-\n No trailing newline\n---\nBody."
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.description == "No trailing newline"
|
||||
|
||||
def test_folded_strip_chomping(self) -> None:
|
||||
content = "---\nname: test-skill\ndescription: >-\n Folded with\n strip chomping\n---\nBody."
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.description == "Folded with strip chomping"
|
||||
|
||||
def test_literal_keep_chomping(self) -> None:
|
||||
content = "---\nname: test-skill\ndescription: |+\n Keep trailing\n---\nBody."
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.description == "Keep trailing\n"
|
||||
|
||||
def test_folded_keep_chomping(self) -> None:
|
||||
content = "---\nname: test-skill\ndescription: >+\n Keep trailing\n newline\n---\nBody."
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.description == "Keep trailing newline\n"
|
||||
|
||||
def test_block_scalar_no_continuation_lines(self) -> None:
|
||||
content = "---\nname: test-skill\ndescription: |\nlicense: MIT\n---\nBody."
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
# description becomes empty string which fails validation (empty/whitespace)
|
||||
assert result is None
|
||||
|
||||
def test_block_scalar_varying_indentation(self) -> None:
|
||||
content = (
|
||||
"---\n"
|
||||
"name: test-skill\n"
|
||||
"description: |\n"
|
||||
" Line with 4-space indent\n"
|
||||
" Line with 4-space indent\n"
|
||||
"---\n"
|
||||
"Body."
|
||||
)
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.description == "Line with 4-space indent\nLine with 4-space indent\n"
|
||||
|
||||
def test_folded_block_scalar_real_skill_format(self) -> None:
|
||||
"""End-to-end test matching the format used in .github/skills/ SKILL.md files."""
|
||||
content = (
|
||||
"---\n"
|
||||
"name: python-development\n"
|
||||
"description: >\n"
|
||||
" Coding standards, conventions, and patterns for developing Python code in the\n"
|
||||
" Agent Framework repository. Use this when writing or modifying Python source\n"
|
||||
" files in the python/ directory.\n"
|
||||
"---\n"
|
||||
"\n"
|
||||
"# Python Development Standards\n"
|
||||
)
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.description == (
|
||||
"Coding standards, conventions, and patterns for developing Python code in the "
|
||||
"Agent Framework repository. Use this when writing or modifying Python source "
|
||||
"files in the python/ directory."
|
||||
)
|
||||
|
||||
def test_block_scalar_with_other_fields_after(self) -> None:
|
||||
content = "---\nname: test-skill\ndescription: >\n A folded\n description\nlicense: MIT\n---\nBody."
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.description == "A folded description"
|
||||
assert result.license == "MIT"
|
||||
|
||||
def test_plain_value_unchanged(self) -> None:
|
||||
"""Non-block-scalar values must not be affected by the block scalar logic."""
|
||||
content = "---\nname: test-skill\ndescription: A simple description.\n---\nBody."
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.description == "A simple description."
|
||||
|
||||
def test_block_scalar_content_with_colons(self) -> None:
|
||||
"""Lines inside a block scalar that look like YAML key-value pairs must be preserved verbatim."""
|
||||
content = (
|
||||
"---\nname: test-skill\ndescription: |\n Some text with colon: in it\n Another: line here\n---\nBody."
|
||||
)
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.description == "Some text with colon: in it\nAnother: line here\n"
|
||||
|
||||
def test_block_scalar_on_license_field(self) -> None:
|
||||
"""Block scalars should work on any field, not only description."""
|
||||
content = (
|
||||
"---\n"
|
||||
"name: test-skill\n"
|
||||
"description: A skill.\n"
|
||||
"license: >\n"
|
||||
" Custom license\n"
|
||||
" spanning multiple lines\n"
|
||||
"---\n"
|
||||
"Body."
|
||||
)
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.license == "Custom license spanning multiple lines"
|
||||
|
||||
def test_block_scalar_tab_indentation(self) -> None:
|
||||
"""Tab characters should count as indentation for block scalar continuation lines."""
|
||||
content = "---\nname: test-skill\ndescription: |\n\tTab-indented line one\n\tTab-indented line two\n---\nBody."
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.description == "Tab-indented line one\nTab-indented line two\n"
|
||||
|
||||
def test_block_scalar_blank_line_within_block(self) -> None:
|
||||
"""Blank lines within a block scalar should be preserved as paragraph separators."""
|
||||
content = "---\nname: test-skill\ndescription: |\n First paragraph\n\n Second paragraph\n---\nBody."
|
||||
result = FileSkillsSource._extract_frontmatter(content, "test.md")
|
||||
assert result is not None
|
||||
assert result.description == "First paragraph\n\nSecond paragraph\n"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests: Skill spec fields (via SkillFrontmatter)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -5498,9 +5633,7 @@ class TestArrayStyleScriptArgs:
|
||||
return "ok"
|
||||
|
||||
assert isinstance(my_runner, SkillScriptRunner)
|
||||
skill = FileSkill(
|
||||
frontmatter=SkillFrontmatter(name="s", description="d"), content="c", path=f"{_ABS}/test"
|
||||
)
|
||||
skill = FileSkill(frontmatter=SkillFrontmatter(name="s", description="d"), content="c", path=f"{_ABS}/test")
|
||||
script = FileSkillScript(name="run.py", full_path=f"{_ABS}/test/run.py")
|
||||
result = my_runner(skill, script, args=["--flag", "value"])
|
||||
assert result == "ok"
|
||||
|
||||
Reference in New Issue
Block a user