Skip to content

Commit 74d7deb

Browse files
committed
feat(skill): add YAML auto-repair for unquoted colons in frontmatter
Enhance MarkdownSkillParser to automatically repair YAML frontmatter containing unquoted colon-space patterns in scalar values. When SnakeYAML fails to parse, the parser now attempts to quote problematic values before giving up. Refs #1442
1 parent 13a7167 commit 74d7deb

2 files changed

Lines changed: 238 additions & 2 deletions

File tree

agentscope-core/src/main/java/io/agentscope/core/skill/util/MarkdownSkillParser.java

Lines changed: 109 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,22 @@ private static Map<String, Object> parseYamlMetadata(String yamlContent) {
154154
try {
155155
loaded = createParserYaml().load(yamlContent);
156156
} catch (RuntimeException e) {
157-
logger.debug("Failed to parse YAML frontmatter, returning empty metadata", e);
158-
return Map.of();
157+
String repaired = repairYamlWithUnquotedColons(yamlContent);
158+
if (!repaired.equals(yamlContent)) {
159+
try {
160+
loaded = createParserYaml().load(repaired);
161+
logger.warn(
162+
"YAML frontmatter contained unquoted colons and was auto-repaired. "
163+
+ "Consider quoting scalar values containing ': ': {}",
164+
yamlContent.substring(0, Math.min(80, yamlContent.length())));
165+
} catch (RuntimeException e2) {
166+
logger.debug("Failed to repair YAML frontmatter, returning empty metadata", e2);
167+
return Map.of();
168+
}
169+
} else {
170+
logger.debug("Failed to parse YAML frontmatter, returning empty metadata", e);
171+
return Map.of();
172+
}
159173
}
160174

161175
if (loaded == null) {
@@ -182,6 +196,99 @@ private static Map<String, Object> parseYamlMetadata(String yamlContent) {
182196
return metadata;
183197
}
184198

199+
/**
200+
* Attempts to repair YAML content that contains unquoted colons in scalar values.
201+
*
202+
* <p>This handles the common case where a value contains patterns like "key:" that YAML
203+
* interprets as mapping keys, for example:
204+
* <pre>
205+
* description: test, node: cannot find EDI partner
206+
* </pre>
207+
*
208+
* <p>The repair strategy wraps values in double quotes when they contain ": " patterns
209+
* that would otherwise be parsed as key-value separators.
210+
*
211+
* @param yamlContent The original YAML content that failed to parse
212+
* @return Repaired YAML content, or the original if no repair was possible
213+
*/
214+
private static String repairYamlWithUnquotedColons(String yamlContent) {
215+
StringBuilder result = new StringBuilder();
216+
String[] lines = yamlContent.split("\n", -1);
217+
218+
for (String line : lines) {
219+
int firstColon = line.indexOf(':');
220+
if (firstColon > 0 && line.length() > firstColon + 1) {
221+
String keyPart = line.substring(0, firstColon);
222+
String valuePart = line.substring(firstColon + 1);
223+
224+
String trimmedKey = keyPart.trim();
225+
if (!trimmedKey.isEmpty() && !trimmedKey.contains(" ")) {
226+
if (needsQuoting(valuePart)) {
227+
String repairedValue = quoteValue(valuePart);
228+
line = keyPart + ":" + repairedValue;
229+
}
230+
}
231+
}
232+
result.append(line).append('\n');
233+
}
234+
235+
if (result.length() > 0) {
236+
result.setLength(result.length() - 1);
237+
}
238+
return result.toString();
239+
}
240+
241+
/**
242+
* Checks if a YAML value needs quoting because it contains unquoted colon-space patterns.
243+
*/
244+
private static boolean needsQuoting(String value) {
245+
String trimmed = value.trim();
246+
if (trimmed.isEmpty()) {
247+
return false;
248+
}
249+
250+
if ((trimmed.startsWith("\"") && trimmed.endsWith("\""))
251+
|| (trimmed.startsWith("'") && trimmed.endsWith("'"))) {
252+
return false;
253+
}
254+
255+
return findUnquotedColonSpace(trimmed) >= 0;
256+
}
257+
258+
/**
259+
* Finds the index of ": " that is not inside quotes.
260+
*
261+
* @return Index of the unquoted ": " or -1 if none found
262+
*/
263+
private static int findUnquotedColonSpace(String value) {
264+
boolean inDoubleQuotes = false;
265+
boolean inSingleQuotes = false;
266+
267+
for (int i = 0; i < value.length() - 1; i++) {
268+
char c = value.charAt(i);
269+
if (c == '"' && !inSingleQuotes) {
270+
inDoubleQuotes = !inDoubleQuotes;
271+
} else if (c == '\'' && !inDoubleQuotes) {
272+
inSingleQuotes = !inSingleQuotes;
273+
} else if (!inDoubleQuotes
274+
&& !inSingleQuotes
275+
&& c == ':'
276+
&& value.charAt(i + 1) == ' ') {
277+
return i;
278+
}
279+
}
280+
return -1;
281+
}
282+
283+
/**
284+
* Quotes a YAML value in double quotes, escaping any internal double quotes and backslashes.
285+
*/
286+
private static String quoteValue(String value) {
287+
String trimmed = value.trim();
288+
String escaped = trimmed.replace("\\", "\\\\").replace("\"", "\\\"");
289+
return " \"" + escaped + "\"";
290+
}
291+
185292
private static LoaderOptions createLoaderOptions() {
186293
LoaderOptions options = new LoaderOptions();
187294
options.setAllowDuplicateKeys(false);

agentscope-core/src/test/java/io/agentscope/core/skill/util/MarkdownSkillParserTest.java

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,4 +730,133 @@ void testMetadataImmutable() {
730730
() -> parsed.getMetadata().put("description", "desc"));
731731
}
732732
}
733+
734+
@Nested
735+
@DisplayName("YAML Auto-Repair Tests")
736+
class YamlAutoRepairTests {
737+
738+
@Test
739+
@DisplayName("Should auto-repair description with unquoted colons")
740+
void testAutoRepairUnquotedColons() {
741+
String markdown =
742+
"---\n"
743+
+ "name: testskils\n"
744+
+ "description: 测试skills, node: 无法找到EDI Partner、EDI Partner不存在\n"
745+
+ "---\n"
746+
+ "# Skill Content";
747+
748+
ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown);
749+
750+
assertNotNull(parsed);
751+
assertTrue(parsed.hasFrontmatter());
752+
assertEquals("testskils", parsed.getMetadata().get("name"));
753+
String description = (String) parsed.getMetadata().get("description");
754+
assertNotNull(description);
755+
assertTrue(description.contains("node:"));
756+
assertTrue(description.contains("无法找到EDI Partner"));
757+
}
758+
759+
@Test
760+
@DisplayName("Should auto-repair description with error message containing colon")
761+
void testAutoRepairErrorMessageWithColon() {
762+
String markdown =
763+
"---\n"
764+
+ "name: edi-skill\n"
765+
+ "description: When error contains: Can't find the EDI Customer setup\n"
766+
+ "---\n"
767+
+ "# Content";
768+
769+
ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown);
770+
771+
assertTrue(parsed.hasFrontmatter());
772+
String description = (String) parsed.getMetadata().get("description");
773+
assertNotNull(description);
774+
assertTrue(description.contains("Can't find the EDI Customer setup"));
775+
}
776+
777+
@Test
778+
@DisplayName("Should handle already quoted values without double-quoting")
779+
void testAlreadyQuotedValuesNotDoubleQuoted() {
780+
String markdown =
781+
"---\n"
782+
+ "name: test\n"
783+
+ "description: \"Already quoted: with colon\"\n"
784+
+ "---\n"
785+
+ "# Content";
786+
787+
ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown);
788+
789+
assertTrue(parsed.hasFrontmatter());
790+
assertEquals("Already quoted: with colon", parsed.getMetadata().get("description"));
791+
}
792+
793+
@Test
794+
@DisplayName("Should handle multiple fields with unquoted colons")
795+
void testMultipleFieldsWithUnquotedColons() {
796+
String markdown =
797+
"---\n"
798+
+ "name: multi-colon\n"
799+
+ "description: Error: something failed, detail: node: not found\n"
800+
+ "example: status: error, code: 500\n"
801+
+ "---\n"
802+
+ "# Content";
803+
804+
ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown);
805+
806+
assertTrue(parsed.hasFrontmatter());
807+
String description = (String) parsed.getMetadata().get("description");
808+
assertNotNull(description);
809+
assertTrue(description.contains("Error:"));
810+
assertTrue(description.contains("detail:"));
811+
String example = (String) parsed.getMetadata().get("example");
812+
assertNotNull(example);
813+
assertTrue(example.contains("status:"));
814+
assertTrue(example.contains("code:"));
815+
}
816+
817+
@Test
818+
@DisplayName("Should still parse valid YAML without repair")
819+
void testValidYamlNoRepairNeeded() {
820+
String markdown =
821+
"---\n"
822+
+ "name: valid-yaml\n"
823+
+ "description: A normal description without colons\n"
824+
+ "version: 1.0.0\n"
825+
+ "---\n"
826+
+ "# Content";
827+
828+
ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown);
829+
830+
assertTrue(parsed.hasFrontmatter());
831+
assertEquals("valid-yaml", parsed.getMetadata().get("name"));
832+
assertEquals(
833+
"A normal description without colons", parsed.getMetadata().get("description"));
834+
assertEquals("1.0.0", parsed.getMetadata().get("version"));
835+
}
836+
837+
@Test
838+
@DisplayName("Should handle Chinese text with colons")
839+
void testChineseTextWithColons() {
840+
String markdown =
841+
"---\n"
842+
+ "name: chinese-skill\n"
843+
+ "description: 测试skills, node: 无法找到EDI Partner、EDI"
844+
+ " Partner不存在、Partner配置错误、850订单没有生成SO、850订单报错、Can't find the EDI Customer"
845+
+ " setup in the EDI partner function、查不到850订单。处理EDI 850订单中无法找到EDI"
846+
+ " Partner的问题,当850报错包含Can't find the EDI Customer setup in the EDI partner"
847+
+ " function时使用此skill。\n"
848+
+ "---\n"
849+
+ "# Content";
850+
851+
ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown);
852+
853+
assertTrue(parsed.hasFrontmatter());
854+
assertEquals("chinese-skill", parsed.getMetadata().get("name"));
855+
String description = (String) parsed.getMetadata().get("description");
856+
assertNotNull(description);
857+
assertTrue(description.contains("无法找到EDI Partner"));
858+
assertTrue(description.contains("850订单"));
859+
assertTrue(description.contains("EDI Customer setup"));
860+
}
861+
}
733862
}

0 commit comments

Comments
 (0)