Skip to content

Commit 0480cf7

Browse files
authored
refactor(sidekick): common ExtractCrossReferenceLinks (#5863)
Swift is going to need the cross-reference links found in the service specification comments (see aip.dev/192), and then will use these cross reference links to create proper Swifty links to the right things. This refactors some of the code to a common place.
1 parent fe43c66 commit 0480cf7

3 files changed

Lines changed: 294 additions & 75 deletions

File tree

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
// Copyright 2026 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package language
16+
17+
import (
18+
"bytes"
19+
"regexp"
20+
"sort"
21+
22+
"github.com/yuin/goldmark/ast"
23+
)
24+
25+
// ExtractCrossReferenceLinks returns the cross-reference links found in `doc` and `source`.
26+
//
27+
// Google Cloud documentation comments include cross-reference links:
28+
//
29+
// https://google.aip.dev/192#cross-references
30+
//
31+
// That is, markdown reference-style links in the form `[Title][Definition]` where `Title` is the text that should
32+
// appear in the documentation and `Definition` is the name of a Protobuf entity, e.g., `google.longrunning.Operation`.
33+
//
34+
// The cross reference links can be of the form `[Title][]` when both the title and definitions match. And may also
35+
// be relative to the current entity (e.g. `Operation` instead of the fully qualified `google.longrunning.Operation`).
36+
func ExtractCrossReferenceLinks(doc ast.Node, source []byte) []string {
37+
protobufLinks := map[string]bool{}
38+
ast.Walk(doc, func(node ast.Node, entering bool) (ast.WalkStatus, error) {
39+
if !entering {
40+
return ast.WalkContinue, nil
41+
}
42+
switch node.Kind() {
43+
case ast.KindParagraph:
44+
text := node.Lines().Value(source)
45+
extractProtoLinks(text, protobufLinks)
46+
return ast.WalkContinue, nil
47+
case ast.KindTextBlock:
48+
text := node.Lines().Value(source)
49+
extractProtoLinks(text, protobufLinks)
50+
return ast.WalkContinue, nil
51+
default:
52+
return ast.WalkContinue, nil
53+
}
54+
})
55+
var sortedLinks []string
56+
for link := range protobufLinks {
57+
sortedLinks = append(sortedLinks, link)
58+
}
59+
sort.Strings(sortedLinks)
60+
return sortedLinks
61+
}
62+
63+
var (
64+
explicitLink = regexp.MustCompile(`\]\[([^.][^]]*)\]`)
65+
impliedLink = regexp.MustCompile(`\[([^.][^]]*)\]\[\]`)
66+
)
67+
68+
func extractProtoLinks(p []byte, links map[string]bool) {
69+
for _, m := range explicitLink.FindAllSubmatch(p, -1) {
70+
if validProtoName(m[1]) {
71+
links[string(m[1])] = true
72+
}
73+
}
74+
for _, m := range impliedLink.FindAllSubmatch(p, -1) {
75+
if validProtoName(m[1]) {
76+
links[string(m[1])] = true
77+
}
78+
}
79+
}
80+
81+
func validProtoName(b []byte) bool {
82+
parts := bytes.Split(b, []byte("."))
83+
for _, p := range parts {
84+
if !isIdent(p) {
85+
return false
86+
}
87+
}
88+
return len(parts) != 0
89+
}
90+
91+
// isProtoIdent returns true if the id looks like a valid identifier.
92+
func isIdent(id []byte) bool {
93+
if len(id) == 0 {
94+
return false
95+
}
96+
if !isIdStartingChar(id[0]) {
97+
return false
98+
}
99+
for _, b := range id[1:] {
100+
if !isIdChar(b) {
101+
return false
102+
}
103+
}
104+
return true
105+
}
106+
107+
func isIdStartingChar(b byte) bool {
108+
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')
109+
}
110+
111+
func isIdChar(b byte) bool {
112+
return isIdStartingChar(b) || (b >= '0' && b <= '9') || b == '_'
113+
}
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
// Copyright 2026 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package language
16+
17+
import (
18+
"testing"
19+
20+
"github.com/google/go-cmp/cmp"
21+
"github.com/yuin/goldmark"
22+
"github.com/yuin/goldmark/parser"
23+
"github.com/yuin/goldmark/text"
24+
)
25+
26+
func TestExtractCrossReferenceLinks(t *testing.T) {
27+
for _, test := range []struct {
28+
name string
29+
input string
30+
want []string
31+
}{
32+
{
33+
name: "standard links",
34+
input: `
35+
[Any][google.protobuf.Any]
36+
[Message][test.v1.SomeMessage]
37+
`,
38+
want: []string{"google.protobuf.Any", "test.v1.SomeMessage"},
39+
},
40+
{
41+
name: "implied links",
42+
input: `
43+
implied service reference [SomeService][]
44+
implied method reference [SomeService.CreateFoo][]
45+
`,
46+
want: []string{"SomeService", "SomeService.CreateFoo"},
47+
},
48+
{
49+
name: "no links",
50+
input: `Just some text without links.`,
51+
want: nil,
52+
},
53+
{
54+
name: "multiple links on one line",
55+
input: `[Service][test.v1.SomeService] [field][test.v1.SomeMessage.field]`,
56+
want: []string{"test.v1.SomeMessage.field", "test.v1.SomeService"},
57+
},
58+
{
59+
name: "link definitions",
60+
input: `Link definitions should be added when collapsed links are used.
61+
For example, [google][].
62+
Second [example][].
63+
[Third] example.
64+
[google]: https://www.google.com
65+
[example]: https://www.example.com
66+
[Third]: https://www.third.com`,
67+
want: []string{"example", "google"},
68+
},
69+
{
70+
name: "explicit cross links",
71+
input: `
72+
[Any][google.protobuf.Any]
73+
[Message][test.v1.SomeMessage]
74+
[Enum][test.v1.SomeMessage.SomeEnum]
75+
[Message][test.v1.SomeMessage] repeated
76+
[Service][test.v1.SomeService] [field][test.v1.SomeMessage.field]
77+
[oneof group][test.v1.SomeMessage.result]
78+
[oneof field][test.v1.SomeMessage.error]
79+
[unmangled field][test.v1.SomeMessage.type] - normally r#type, but not in links
80+
[SomeMessage.error][test.v1.SomeMessage.error]
81+
[ExternalMessage][google.iam.v1.SetIamPolicyRequest]
82+
[ExternalService][google.iam.v1.IAMPolicy]
83+
[ENUM_VALUE][test.v1.SomeMessage.SomeEnum.ENUM_VALUE]
84+
[SomeService.CreateFoo][test.v1.SomeService.CreateFoo]
85+
[SomeService.CreateBar][test.v1.SomeService.CreateBar]
86+
[a method][test.v1.YELL.CreateThing]
87+
[the service name][test.v1.YELL]
88+
[renamed service][test.v1.RenamedService]
89+
[method of renamed service][test.v1.RenamedService.CreateFoo]
90+
`,
91+
want: []string{
92+
"google.iam.v1.IAMPolicy",
93+
"google.iam.v1.SetIamPolicyRequest",
94+
"google.protobuf.Any",
95+
"test.v1.RenamedService",
96+
"test.v1.RenamedService.CreateFoo",
97+
"test.v1.SomeMessage",
98+
"test.v1.SomeMessage.SomeEnum",
99+
"test.v1.SomeMessage.SomeEnum.ENUM_VALUE",
100+
"test.v1.SomeMessage.error",
101+
"test.v1.SomeMessage.field",
102+
"test.v1.SomeMessage.result",
103+
"test.v1.SomeMessage.type",
104+
"test.v1.SomeService",
105+
"test.v1.SomeService.CreateBar",
106+
"test.v1.SomeService.CreateFoo",
107+
"test.v1.YELL",
108+
"test.v1.YELL.CreateThing",
109+
},
110+
},
111+
{
112+
name: "relative cross links",
113+
input: `
114+
[relative link to service][SomeService]
115+
[relative link to method][SomeService.CreateFoo]
116+
[relative link to message][SomeMessage]
117+
[relative link to message field][SomeMessage.field]
118+
[relative link to message oneof group][SomeMessage.result]
119+
[relative link to message oneof field][SomeMessage.error]
120+
[relative link to unmangled field][SomeMessage.type]
121+
[relative link to enum][SomeMessage.SomeEnum]
122+
[relative link to enum value][SomeMessage.SomeEnum.ENUM_VALUE]
123+
`,
124+
want: []string{
125+
"SomeMessage",
126+
"SomeMessage.SomeEnum",
127+
"SomeMessage.SomeEnum.ENUM_VALUE",
128+
"SomeMessage.error",
129+
"SomeMessage.field",
130+
"SomeMessage.result",
131+
"SomeMessage.type",
132+
"SomeService",
133+
"SomeService.CreateFoo",
134+
},
135+
},
136+
{
137+
name: "implied cross links",
138+
input: `
139+
implied service reference [SomeService][]
140+
implied method reference [SomeService.CreateFoo][]
141+
implied message reference [SomeMessage][]
142+
implied message field reference [SomeMessage.field][]
143+
implied message oneof group reference [SomeMessage.result][]
144+
implied message oneof field reference [SomeMessage.error][]
145+
implied message unmangled field reference [SomeMessage.type][]
146+
implied enum reference [SomeMessage.SomeEnum][]
147+
implied enum value reference [SomeMessage.SomeEnum.ENUM_VALUE][]
148+
`,
149+
want: []string{
150+
"SomeMessage",
151+
"SomeMessage.SomeEnum",
152+
"SomeMessage.SomeEnum.ENUM_VALUE",
153+
"SomeMessage.error",
154+
"SomeMessage.field",
155+
"SomeMessage.result",
156+
"SomeMessage.type",
157+
"SomeService",
158+
"SomeService.CreateFoo",
159+
},
160+
},
161+
{
162+
name: "text block in list item",
163+
input: `- [ListMessage][test.v1.ListMessage]`,
164+
want: []string{"test.v1.ListMessage"},
165+
},
166+
} {
167+
t.Run(test.name, func(t *testing.T) {
168+
md := goldmark.New(
169+
goldmark.WithParserOptions(
170+
parser.WithAutoHeadingID(),
171+
),
172+
)
173+
doc := md.Parser().Parse(text.NewReader([]byte(test.input)))
174+
got := ExtractCrossReferenceLinks(doc, []byte(test.input))
175+
if diff := cmp.Diff(test.want, got); diff != "" {
176+
t.Errorf("mismatch (-want +got):\n%s", diff)
177+
}
178+
})
179+
}
180+
}

internal/sidekick/rust/codec.go

Lines changed: 1 addition & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
package rust
1616

1717
import (
18-
"bytes"
1918
"fmt"
2019
"log/slog"
2120
"maps"
@@ -942,7 +941,7 @@ func (c *codec) formatDocComments(
942941
return ast.WalkContinue, nil
943942
})
944943

945-
for _, link := range protobufLinkMapping(doc, documentationBytes) {
944+
for _, link := range language.ExtractCrossReferenceLinks(doc, documentationBytes) {
946945
rusty, err := c.docLink(link, model, scopes)
947946
if err != nil {
948947
return nil, err
@@ -962,79 +961,6 @@ func (c *codec) formatDocComments(
962961
return results, nil
963962
}
964963

965-
// protobufLinkMapping returns additional comment lines to map protobuf links
966-
// to Rustdoc links.
967-
//
968-
// Protobuf comments include links in the form `[Title][Definition]` where
969-
// `Title` is the text that should appear in the documentation and `Definition`
970-
// is the name of a Protobuf entity, e.g., `google.longrunning.Operation`.
971-
//
972-
// We need to map these references from Protobuf names to the corresponding
973-
// entity in the generated code. We do this by appending a number of link
974-
// definitions to the comments, e.g.
975-
//
976-
// //// [google.longrunning.Operation]: google_cloud_longrunning::model::Operation
977-
func protobufLinkMapping(doc ast.Node, source []byte) []string {
978-
protobufLinks := map[string]bool{}
979-
ast.Walk(doc, func(node ast.Node, entering bool) (ast.WalkStatus, error) {
980-
switch node.Kind() {
981-
case ast.KindParagraph:
982-
text := node.Lines().Value(source)
983-
extractProtoLinks(text, protobufLinks)
984-
return ast.WalkContinue, nil
985-
case ast.KindTextBlock:
986-
text := node.Lines().Value(source)
987-
extractProtoLinks(text, protobufLinks)
988-
return ast.WalkContinue, nil
989-
default:
990-
return ast.WalkContinue, nil
991-
}
992-
})
993-
var sortedLinks []string
994-
for link := range protobufLinks {
995-
sortedLinks = append(sortedLinks, link)
996-
}
997-
sort.Strings(sortedLinks)
998-
return sortedLinks
999-
}
1000-
1001-
// commentCrossReferenceLink is a regular expression to find cross links in
1002-
// comments.
1003-
//
1004-
// The Google API documentation (typically in protos) include links to code
1005-
// elements in the form `[Thing][google.package.blah.v1.Thing.SubThing]`.
1006-
// This regular expression captures the `][...]` part. There is a lot of scaping
1007-
// because the brackets are metacharacters in regex.
1008-
var commentCrossReferenceLink = regexp.MustCompile(
1009-
`` + // `go fmt` is annoying
1010-
`\]` + // The closing bracket for the `[Thing]`
1011-
`\[` + // The opening bracket for the code element.
1012-
`[A-Za-z][A-Za-z0-9_]*` + // A thing that looks like a Protobuf identifier
1013-
`(\.` + // Followed by (maybe a dot)
1014-
`[A-Za-z][A-Za-z0-9_]*` + // A thing that looks like a Protobuf identifier
1015-
`)*` + // zero or more times
1016-
`\]`) // The closing bracket
1017-
1018-
// commentImpliedCrossReferenceLink is a regular expression to find implied
1019-
// cross reference links.
1020-
var commentImpliedCrossReferenceLink = regexp.MustCompile(
1021-
`` + // `go fmt` is annoying
1022-
`\[` +
1023-
`[A-Z-a-z][A-Za-z0-9_]*` + // A thing that looks like a Protobuf identifier
1024-
`(\.[A-Za-z][A-Za-z0-9_]*)*` + // Followed by more identifiers
1025-
`\]\[\]`) // The closing bracket followed by an empty link label
1026-
1027-
func extractProtoLinks(paragraph []byte, links map[string]bool) {
1028-
for _, match := range commentCrossReferenceLink.FindAll(paragraph, -1) {
1029-
match = bytes.TrimSuffix(bytes.TrimPrefix(match, []byte("][")), []byte("]"))
1030-
links[string(match)] = true
1031-
}
1032-
for _, match := range commentImpliedCrossReferenceLink.FindAll(paragraph, -1) {
1033-
match = bytes.TrimSuffix(bytes.TrimPrefix(match, []byte("[")), []byte("][]"))
1034-
links[string(match)] = true
1035-
}
1036-
}
1037-
1038964
func processCommentLine(node ast.Node, line text.Segment, documentationBytes []byte) string {
1039965
lineString := escapeHTMLTags(node, line, documentationBytes)
1040966
lineString = escapeUrls(lineString)

0 commit comments

Comments
 (0)