Skip to content

Commit 6688559

Browse files
authored
fix(skill): ignore complex YAML frontmatter to prevent parsing errors (agentscope-ai#1043)
## Description Close agentscope-ai#1030 This PR updates the internal `SimpleYamlParser` to gracefully skip unsupported block-style complex structures (like lists or nested objects) instead of throwing exceptions. ## Checklist Please check the following items before code is ready to be reviewed. - [ ] Code has been formatted with `mvn spotless:apply` - [ ] All tests are passing (`mvn test`) - [ ] Javadoc comments are complete and follow project conventions - [ ] Related documentation has been updated (e.g. links, examples, etc.) - [ ] Code is ready for review
1 parent fb4d422 commit 6688559

2 files changed

Lines changed: 130 additions & 29 deletions

File tree

agentscope-core/src/main/java/io/agentscope/core/skill/util/MarkdownSkillParser.java

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import java.util.Map;
2121
import java.util.regex.Matcher;
2222
import java.util.regex.Pattern;
23+
import org.slf4j.Logger;
24+
import org.slf4j.LoggerFactory;
2325

2426
/**
2527
* Utility for parsing and generating Markdown files with YAML frontmatter.
@@ -54,6 +56,8 @@
5456
*/
5557
public class MarkdownSkillParser {
5658

59+
private static final Logger logger = LoggerFactory.getLogger(MarkdownSkillParser.class);
60+
5761
/**
5862
* Private constructor to prevent instantiation.
5963
*/
@@ -81,7 +85,6 @@ private MarkdownSkillParser() {}
8185
*
8286
* @param markdown Markdown content (may or may not have frontmatter)
8387
* @return ParsedMarkdown containing metadata and content
84-
* @throws IllegalArgumentException if YAML syntax is invalid
8588
*/
8689
public static ParsedMarkdown parse(String markdown) {
8790
if (markdown == null || markdown.isEmpty()) {
@@ -102,14 +105,8 @@ public static ParsedMarkdown parse(String markdown) {
102105
return new ParsedMarkdown(Map.of(), markdownContent);
103106
}
104107

105-
try {
106-
Map<String, String> metadata = SimpleYamlParser.parse(yamlContent);
107-
return new ParsedMarkdown(metadata, markdownContent);
108-
} catch (IllegalArgumentException e) {
109-
throw e;
110-
} catch (RuntimeException e) {
111-
throw new IllegalArgumentException("Invalid YAML frontmatter syntax", e);
112-
}
108+
Map<String, String> metadata = SimpleYamlParser.parse(yamlContent);
109+
return new ParsedMarkdown(metadata, markdownContent);
113110
}
114111

115112
/**
@@ -158,9 +155,14 @@ private static class SimpleYamlParser {
158155
/**
159156
* Parse YAML string into a map of key-value pairs.
160157
*
158+
* <p>This is a simplified parser designed for flat string-to-string mappings.
159+
* Block-style complex YAML structures (such as multi-line lists or indented
160+
* nested objects) are not supported and will be gracefully skipped.
161+
* However, flow-style inline structures (e.g., single-line JSON strings)
162+
* are treated as standard scalar values and will be parsed as raw strings.
163+
*
161164
* @param yaml YAML content to parse
162165
* @return Map of key-value pairs
163-
* @throws IllegalArgumentException if YAML syntax is invalid
164166
*/
165167
static Map<String, String> parse(String yaml) {
166168
Map<String, String> result = new LinkedHashMap<>();
@@ -184,19 +186,44 @@ static Map<String, String> parse(String yaml) {
184186

185187
Matcher matcher = KEY_VALUE_PATTERN.matcher(line.trim());
186188
if (!matcher.matches()) {
187-
throw new IllegalArgumentException(
188-
"Invalid YAML line (expected 'key: value' format): " + line);
189+
logger.debug(
190+
"Skipping unsupported YAML line (expected 'key: value' format): {}",
191+
line);
192+
continue;
189193
}
190194

191195
String key = matcher.group(1);
192-
String value = parseValue(matcher.group(2));
196+
String rawValue = matcher.group(2);
193197

194-
result.put(key, value);
198+
if (isBlockScalarModifier(rawValue)) {
199+
logger.debug(
200+
"Skipping key '{}': block-style values ('{}') are unsupported",
201+
key,
202+
rawValue.trim());
203+
continue;
204+
}
205+
206+
result.put(key, parseValue(rawValue));
195207
}
196208

197209
return result;
198210
}
199211

212+
/**
213+
* Check if the raw value is a YAML block scalar modifier ('|' or '>').
214+
*
215+
* @param rawValue The raw string captured after the colon
216+
* @return true if it is a block scalar modifier
217+
*/
218+
private static boolean isBlockScalarModifier(String rawValue) {
219+
if (rawValue == null) {
220+
return false;
221+
}
222+
223+
String trimmed = rawValue.trim();
224+
return "|".equals(trimmed) || ">".equals(trimmed);
225+
}
226+
200227
/**
201228
* Parse a YAML value, handling quoted strings.
202229
*

agentscope-core/src/test/java/io/agentscope/core/skill/util/MarkdownSkillParserTest.java

Lines changed: 89 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import static org.junit.jupiter.api.Assertions.assertFalse;
2121
import static org.junit.jupiter.api.Assertions.assertNotNull;
2222
import static org.junit.jupiter.api.Assertions.assertNull;
23-
import static org.junit.jupiter.api.Assertions.assertThrows;
2423
import static org.junit.jupiter.api.Assertions.assertTrue;
2524

2625
import io.agentscope.core.skill.util.MarkdownSkillParser.ParsedMarkdown;
@@ -283,28 +282,103 @@ void testParseUnicodeCharacters() {
283282
class ErrorHandlingTests {
284283

285284
@Test
286-
@DisplayName("Should throw exception for invalid YAML")
285+
@DisplayName("Should gracefully ignore invalid YAML lines instead of throwing exception")
287286
void testInvalidYaml() {
288287
String markdown = "---\nname: test\nthis is not a valid line\n---\nContent";
289288

290-
IllegalArgumentException exception =
291-
assertThrows(
292-
IllegalArgumentException.class,
293-
() -> MarkdownSkillParser.parse(markdown));
294-
assertTrue(exception.getMessage().contains("Invalid YAML line"));
295-
assertTrue(exception.getMessage().contains("expected 'key: value' format"));
289+
MarkdownSkillParser.ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown);
290+
Map<String, String> metadata = parsed.getMetadata();
291+
292+
assertEquals("test", metadata.get("name"));
293+
assertFalse(metadata.containsKey("this is not a valid line"));
294+
assertEquals("Content", parsed.getContent());
296295
}
297296

298297
@Test
299-
@DisplayName("Should throw exception for list format")
298+
@DisplayName("Should gracefully ignore list format instead of throwing exception")
300299
void testListFormat() {
301-
String markdown = "---\n- item1\n- item2\n---\nContent";
300+
String markdown = "---\nname: test_skill\n- item1\n- item2\n---\nContent";
301+
302+
MarkdownSkillParser.ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown);
303+
Map<String, String> metadata = parsed.getMetadata();
304+
305+
assertEquals("test_skill", metadata.get("name"));
306+
assertFalse(metadata.containsKey("- item1"));
307+
assertFalse(metadata.containsKey("- item2"));
308+
}
309+
310+
@Test
311+
@DisplayName(
312+
"Should parse basic scalars and gracefully ignore complex YAML structures like"
313+
+ " lists or JSON")
314+
void testParseAndIgnoreComplexMetadata() {
315+
String markdown =
316+
"""
317+
---
318+
name: Agent Browser
319+
description: A fast Rust-based headless browser automation CLI
320+
read_when:
321+
- Automating web interactions
322+
- Extracting structured data from pages
323+
metadata: {"clawdbot":{"emoji":"🌐"}}
324+
allowed-tools: Bash(agent-browser:*)
325+
---
326+
327+
# Content
328+
This is the content.\
329+
""";
330+
331+
MarkdownSkillParser.ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown);
332+
Map<String, String> metadata = parsed.getMetadata();
333+
334+
assertEquals("Agent Browser", metadata.get("name"));
335+
assertEquals(
336+
"A fast Rust-based headless browser automation CLI",
337+
metadata.get("description"));
338+
assertEquals("Bash(agent-browser:*)", metadata.get("allowed-tools"));
339+
340+
assertEquals("{\"clawdbot\":{\"emoji\":\"🌐\"}}", metadata.get("metadata"));
341+
342+
assertEquals("", metadata.get("read_when"));
343+
assertNull(metadata.get("- Automating web interactions"));
344+
345+
assertTrue(parsed.getContent().contains("# Content"));
346+
}
347+
348+
@Test
349+
@DisplayName(
350+
"Should gracefully skip keys with block-style modifiers (| or >) instead of"
351+
+ " recording them as literal values")
352+
void testSkipBlockStyleModifiers() {
353+
String markdown =
354+
"""
355+
---
356+
name: test_skill
357+
description: |
358+
This is a multi-line description.
359+
It should be ignored by the simple parser.
360+
summary: >
361+
This is a folded multi-line summary.
362+
It should also be ignored.
363+
version: "1.0"
364+
---
365+
Content\
366+
""";
367+
368+
MarkdownSkillParser.ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown);
369+
Map<String, String> metadata = parsed.getMetadata();
370+
371+
assertEquals("test_skill", metadata.get("name"));
372+
assertEquals("1.0", metadata.get("version"));
373+
374+
assertNull(
375+
metadata.get("description"),
376+
"Block scalar modifier '|' should not be parsed as a literal value");
377+
assertNull(
378+
metadata.get("summary"),
379+
"Block scalar modifier '>' should not be parsed as a literal value");
302380

303-
IllegalArgumentException exception =
304-
assertThrows(
305-
IllegalArgumentException.class,
306-
() -> MarkdownSkillParser.parse(markdown));
307-
assertTrue(exception.getMessage().contains("Invalid YAML line"));
381+
assertFalse(metadata.containsKey(" This is a multi-line description."));
308382
}
309383
}
310384

0 commit comments

Comments
 (0)