fix(graph): rune-based truncation in dotEscape prevents invalid UTF-8 in DOT output (#105)

greynewell · claude · web-flow · commit 161c66e54889 · 2026-04-09T15:01:19.000-04:00
* fix(graph2md): writeGraphData uses zero lineCount when startLine absent

writeGraphData computed lineCount only when startLine &gt; 0 &amp;&amp; endLine &gt; 0.
For nodes without a startLine (API returns 0), the condition was false and
lineCount remained 0, so the graph visualisation data showed lc=0 even
though the same node's frontmatter correctly computed line_count=endLine
(using effectiveStart=1).

Fix: mirror the effectiveStart=1 defaulting used by all frontmatter writers
— if endLine &gt; 0 but startLine &lt;= 0, treat startLine as 1.

Adds TestGraphDataLineCountMissingStartLine to catch the regression.

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;

* fix(lint): remove extra spaces in parseGraphData struct literal

goimports rejects manually aligned spaces; use single space.

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;

* fix(graph): rune-based truncation in dotEscape to avoid invalid UTF-8

dotEscape used byte indexing (s[len(s)-39:]) to take the last 39
characters of a node name. For names with multi-byte UTF-8 characters
(e.g. accented letters, CJK paths), the byte offset could land in the
middle of a multi-byte sequence, producing invalid UTF-8 in the DOT
output and breaking downstream Graphviz tools.

Fix: convert to []rune and slice by rune index.

Adds TestWriteDOT_LongNameTruncated_MultiByteUTF8 as a regression test
(41 × "é" → the old byte slice cut byte 43, which is the second byte of
U+00E9, producing invalid UTF-8).

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;

---------

Co-authored-by: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/internal/graph/handler.go b/internal/graph/handler.go
@@ -121,8 +121,9 @@ func writeDOT(w io.Writer, g *api.Graph, filter string) error {
 }
 
 func dotEscape(s string) string {
-	if len(s) > 40 {
-		s = "…" + s[len(s)-39:]
+	runes := []rune(s)
+	if len(runes) > 40 {
+		return "…" + string(runes[len(runes)-39:])
 	}
 	return s
 }
diff --git a/internal/graph/handler_test.go b/internal/graph/handler_test.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"strings"
 	"testing"
+	"unicode/utf8"
 
 	"github.com/supermodeltools/cli/internal/api"
 )
@@ -307,3 +308,30 @@ func TestPrintGraph_HumanDefault(t *testing.T) {
 		t.Errorf("human output should contain table headers:\n%s", buf.String())
 	}
 }
+
+// TestWriteDOT_LongNameTruncated_MultiByteUTF8 verifies that dotEscape does
+// not split a multi-byte UTF-8 character when truncating long node names.
+// Before the fix, s[len(s)-39:] used byte indexing, which could land in the
+// middle of a multi-byte character and produce invalid UTF-8 in the DOT file.
+func TestWriteDOT_LongNameTruncated_MultiByteUTF8(t *testing.T) {
+	// 41 × "é" (2 bytes each) = 82 bytes, 41 runes.
+	// byte-based slice: s[82-39:] = s[43:] — byte 43 is the second byte of "é"
+	// (U+00E9 encodes as 0xC3 0xA9), producing invalid UTF-8 without the fix.
+	longName := strings.Repeat("é", 41)
+	g := &api.Graph{
+		Nodes: []api.Node{
+			{ID: "n1", Labels: []string{"Function"}, Properties: map[string]any{"name": longName}},
+		},
+	}
+	var buf bytes.Buffer
+	if err := writeDOT(&buf, g, ""); err != nil {
+		t.Fatalf("writeDOT: %v", err)
+	}
+	out := buf.String()
+	if !utf8.ValidString(out) {
+		t.Errorf("writeDOT output contains invalid UTF-8 (byte-based truncation of multi-byte name)")
+	}
+	if strings.Contains(out, longName) {
+		t.Errorf("long multi-byte name should be truncated in DOT output")
+	}
+}

Original file line number	Diff line number	Diff line change
`@@ -121,8 +121,9 @@ func writeDOT(w io.Writer, g *api.Graph, filter string) error {`
`121`	`121`	`}`
`122`	`122`
`123`	`123`	`func dotEscape(s string) string {`
`124`		`- if len(s) > 40 {`
`125`		`- s = "…" + s[len(s)-39:]`
	`124`	`+ runes := []rune(s)`
	`125`	`+ if len(runes) > 40 {`
	`126`	`+ return "…" + string(runes[len(runes)-39:])`
`126`	`127`	`}`
`127`	`128`	`return s`
`128`	`129`	`}`