Skip to content

Commit bc3a698

Browse files
authored
[TFgen] Added a parser to load the OAS (#3851)
* [TFgen] Added a parser to load the OAS Added the parser's test data do `testdata/parser` * [TFgen] Added $ref cycle detection * [TFgen] Fail fast on circular $refs and wire --max-depth LoadSpec now resolves the component schema graph before enumerating: a circular $ref returns a typed *RefCycleError naming the offending ref, and expansion past --max-depth returns a depth error. The --max-depth flag is now wired through the generate command. * [TFgen] Added -race and -cover to the `make tfgen-test` command * [TFgen] (Nit) Addressed some minor concerns - Added better fixture coverage for allOf / anyOf - Added custom error for when max depth is reached in cycles.go
1 parent d1b2dae commit bc3a698

20 files changed

Lines changed: 1150 additions & 9 deletions

.generator-v2/internal/cli/generate.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package cli
22

33
import (
44
"github.com/spf13/cobra"
5+
6+
"github.com/terraform-providers/terraform-provider-datadog/generator/internal/parser"
57
)
68

79
func newGenerateCmd(flags *globalFlags) *cobra.Command {
@@ -12,15 +14,21 @@ func newGenerateCmd(flags *globalFlags) *cobra.Command {
1214
Use: "generate",
1315
Short: "Generate Terraform artifacts from the OpenAPI spec",
1416
RunE: func(cmd *cobra.Command, args []string) error {
15-
// TODO: implement
17+
spec, err := parser.LoadSpec(flags.spec, parser.WithMaxDepth(flags.maxDepth))
18+
if err != nil {
19+
return err
20+
}
21+
22+
// TODO: model -> emit -> writer -> report, honoring --check and --include.
23+
_ = spec
24+
_ = check
25+
_ = include
1626
return nil
1727
},
1828
}
1929

2030
cmd.Flags().BoolVar(&check, "check", false, "Read-only mode: exit 3 if any file would change")
2131
cmd.Flags().StringVar(&include, "include", "", "Comma-separated artifact names to generate (empty = all)")
22-
_ = check
23-
_ = include
2432

2533
return cmd
2634
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package cli
2+
3+
import (
4+
"errors"
5+
"path/filepath"
6+
"testing"
7+
8+
"github.com/terraform-providers/terraform-provider-datadog/generator/internal/parser"
9+
)
10+
11+
// runTfgen builds the root command exactly as Execute does, but with explicit
12+
// args and errors returned rather than printed, so tests can assert on them.
13+
func runTfgen(args ...string) error {
14+
flags := &globalFlags{}
15+
root := newRootCmd("test", flags)
16+
root.AddCommand(newGenerateCmd(flags))
17+
root.AddCommand(newVerifyCmd(flags))
18+
root.SetArgs(args)
19+
root.SilenceErrors = true
20+
return root.Execute()
21+
}
22+
23+
// TestGenerateWiresMaxDepth proves the --max-depth flag value reaches LoadSpec:
24+
// the same deep-but-acyclic spec loads at a high bound and fails at a low one.
25+
// If the flag were ignored, both runs would behave identically.
26+
func TestGenerateWiresMaxDepth(t *testing.T) {
27+
deep := filepath.Join("..", "testdata", "parser", "deep_chain.yaml")
28+
29+
if err := runTfgen("generate", "--spec", deep, "--max-depth", "20"); err != nil {
30+
t.Errorf("--max-depth 20 should load the 8-deep chain, got: %v", err)
31+
}
32+
if err := runTfgen("generate", "--spec", deep, "--max-depth", "4"); err == nil {
33+
t.Error("--max-depth 4 should fail on the 8-deep chain, got nil")
34+
}
35+
}
36+
37+
// TestGenerateSurfacesCycleError proves cycle detection is reachable through the
38+
// command and surfaces the typed error.
39+
func TestGenerateSurfacesCycleError(t *testing.T) {
40+
self := filepath.Join("..", "testdata", "parser", "cycle_self.yaml")
41+
42+
err := runTfgen("generate", "--spec", self)
43+
var cycleErr *parser.RefCycleError
44+
if !errors.As(err, &cycleErr) {
45+
t.Fatalf("error %v (%T) is not a *parser.RefCycleError", err, err)
46+
}
47+
}

.generator-v2/internal/cli/root.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package cli
22

33
import (
44
"github.com/spf13/cobra"
5+
"github.com/terraform-providers/terraform-provider-datadog/generator/internal/parser"
56
)
67

78
type globalFlags struct {
@@ -26,7 +27,7 @@ func newRootCmd(version string, flags *globalFlags) *cobra.Command {
2627
cmd.PersistentFlags().StringVar(&flags.outputRoot, "output-root", "datadog/fwprovider", "Root directory for generated artifacts")
2728
cmd.PersistentFlags().StringVar(&flags.hooksRoot, "hooks-root", "datadog/fwprovider/hooks", "Root directory for hook subpackages")
2829
cmd.PersistentFlags().StringVar(&flags.trackingField, "tracking-field", "x-datadog-tf-generator", "OpenAPI extension name for the tracking field")
29-
cmd.PersistentFlags().IntVar(&flags.maxDepth, "max-depth", 8, "Hard limit on recursive $ref expansion")
30+
cmd.PersistentFlags().IntVar(&flags.maxDepth, "max-depth", parser.DefaultMaxDepth, "Hard limit on recursive $ref expansion")
3031
cmd.PersistentFlags().StringVar(&flags.report, "report", "-", "Where to write the run report (\"-\" = stdout)")
3132
cmd.PersistentFlags().BoolVar(&flags.quiet, "quiet", false, "Suppress informational logging")
3233

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
package parser
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
7+
"github.com/pb33f/libopenapi/datamodel/high/base"
8+
v3 "github.com/pb33f/libopenapi/datamodel/high/v3"
9+
)
10+
11+
const componentSchemaPrefix = "#/components/schemas/"
12+
13+
// RefCycle describes a $ref that re-enters a schema already being expanded.
14+
// Path is the chain of $ref targets that closes the loop; its first and last
15+
// elements are the repeated ref.
16+
type RefCycle struct {
17+
Ref string
18+
Path []string
19+
}
20+
21+
// RefCycleError reports one or more $ref cycles found while loading a spec. It
22+
// implements error so LoadSpec can fail fast, and exposes the offending refs so
23+
// callers can inspect them. The message names the OpenAPI path of the offending
24+
// $ref (e.g. "#/components/schemas/Node").
25+
type RefCycleError struct {
26+
Cycles []RefCycle
27+
}
28+
29+
func (e *RefCycleError) Error() string {
30+
first := e.Cycles[0]
31+
msg := fmt.Sprintf("parser: circular $ref at %s (cycle: %s)", first.Ref, strings.Join(first.Path, " -> "))
32+
if len(e.Cycles) > 1 {
33+
msg += fmt.Sprintf(" (and %d more)", len(e.Cycles)-1)
34+
}
35+
return msg
36+
}
37+
38+
// MaxDepthError reports that $ref expansion hit the --max-depth bound before a
39+
// path terminated. Ref is the $ref that would have pushed past the limit, Chain
40+
// is the path of $refs leading to it, and MaxDepth is the bound that was hit. It
41+
// is returned instead of a *RefCycleError so callers can tell "too deep" from a
42+
// genuine cycle via errors.As.
43+
type MaxDepthError struct {
44+
Ref string
45+
Chain []string
46+
MaxDepth int
47+
}
48+
49+
func (e *MaxDepthError) Error() string {
50+
return fmt.Sprintf("parser: $ref expansion exceeded --max-depth %d at %q (chain: %s)",
51+
e.MaxDepth, e.Ref, strings.Join(e.Chain, " -> "))
52+
}
53+
54+
// DetectRefCycles walks the schema graph rooted at root. It follows $ref
55+
// references and every structural child (properties, items, prefixItems,
56+
// allOf/oneOf/anyOf, not, additionalProperties) and reports each distinct $ref
57+
// cycle. Callers mark cyclic schemas as terminal (model.SchemaKindRefCycle)
58+
// rather than expanding them forever.
59+
//
60+
// maxDepth bounds how many $ref edges may be followed on a single path: an
61+
// acyclic chain longer than maxDepth returns an error, guarding against
62+
// pathological or unbounded specs (the --max-depth flag). maxDepth <= 0 disables
63+
// that bound. Cycles are still found, since a re-entered $ref terminates the
64+
// walk regardless of depth.
65+
func DetectRefCycles(root *base.SchemaProxy, maxDepth int) ([]RefCycle, error) {
66+
w := newCycleWalker(maxDepth)
67+
if err := w.walkProxy("", root); err != nil {
68+
return nil, err
69+
}
70+
return w.cycles, nil
71+
}
72+
73+
// DetectComponentRefCycles runs cycle detection across every component schema,
74+
// seeding each with its own "#/components/schemas/<name>" ref so a component
75+
// that references itself (directly or transitively) is detected even though its
76+
// top-level node is a definition rather than a $ref. The seed does not count
77+
// toward maxDepth, only the $ref edges followed from it do. Detection state is
78+
// shared across components, so each schema's subtree is walked at most once.
79+
func DetectComponentRefCycles(components *v3.Components, maxDepth int) ([]RefCycle, error) {
80+
if components == nil || components.Schemas == nil {
81+
return nil, nil
82+
}
83+
w := newCycleWalker(maxDepth)
84+
for name, proxy := range components.Schemas.FromOldest() {
85+
if err := w.walkProxy(componentSchemaPrefix+name, proxy); err != nil {
86+
return nil, err
87+
}
88+
}
89+
return w.cycles, nil
90+
}
91+
92+
// cycleWalker is a three-color DFS over the schema graph: refs on stack are
93+
// "gray" (re-entry is a cycle), refs in done are "black" (subtree fully
94+
// explored, safe to prune). depth counts only the $ref edges currently on the
95+
// path, so the maxDepth bound is independent of whether the walk was seeded
96+
// with a component's own ref.
97+
type cycleWalker struct {
98+
maxDepth int
99+
depth int // count of $ref edges on the current path
100+
stack []string // refs on the current path (for cycle detection)
101+
onStack map[string]bool // membership test for stack
102+
done map[string]bool // refs whose subtree is fully explored
103+
reported map[string]bool // closing refs already recorded, to dedupe
104+
cycles []RefCycle
105+
}
106+
107+
func newCycleWalker(maxDepth int) *cycleWalker {
108+
return &cycleWalker{
109+
maxDepth: maxDepth,
110+
onStack: map[string]bool{},
111+
done: map[string]bool{},
112+
reported: map[string]bool{},
113+
}
114+
}
115+
116+
// walkProxy descends into the schemaProxy for cycle detection. The ref argument distinguishes the
117+
// two ways a node is reached:
118+
//
119+
// - ref != "": schemaProxy is a named component definition and ref is its canonical
120+
// "#/components/schemas/<name>". It seeds the cycle stack (so the component
121+
// can be detected referencing itself) but is not a $ref edge, so it does not
122+
// consume depth budget.
123+
// - ref == "": schemaProxy is a child node. If it is a $ref, it is followed as a
124+
// depth-counted edge; otherwise it is an inline schema, which has no
125+
// identity and cannot start a cycle, so the walkProxy simply descends into it.
126+
func (w *cycleWalker) walkProxy(ref string, schemaProxy *base.SchemaProxy) error {
127+
if schemaProxy == nil {
128+
return nil
129+
}
130+
131+
edge := false
132+
if ref == "" {
133+
if !schemaProxy.IsReference() {
134+
return w.walkSchema(schemaProxy.Schema())
135+
}
136+
ref, edge = schemaProxy.GetReference(), true
137+
}
138+
139+
entered, err := w.enter(ref, edge)
140+
if err != nil || !entered {
141+
return err
142+
}
143+
walkErr := w.walkSchema(schemaProxy.Schema())
144+
w.leave(ref, walkErr == nil, edge)
145+
return walkErr
146+
}
147+
148+
// enter handles a node on the path. It reports a cycle (and skips) when ref is
149+
// already on the stack, skips refs whose subtree is already explored, and
150+
// errors if the depth bound would be exceeded. Cycle and done checks take
151+
// precedence over the depth bound so a cyclic spec is reported as a cycle,
152+
// never as a depth error. It returns whether the caller should walk ref's children.
153+
func (w *cycleWalker) enter(ref string, edge bool) (bool, error) {
154+
if w.onStack[ref] {
155+
w.recordCycle(ref)
156+
return false, nil
157+
}
158+
if w.done[ref] {
159+
return false, nil
160+
}
161+
if edge && w.maxDepth > 0 && w.depth >= w.maxDepth {
162+
return false, &MaxDepthError{
163+
Ref: ref,
164+
Chain: append(append([]string{}, w.stack...), ref),
165+
MaxDepth: w.maxDepth,
166+
}
167+
}
168+
w.stack = append(w.stack, ref)
169+
w.onStack[ref] = true
170+
if edge {
171+
w.depth++
172+
}
173+
return true, nil
174+
}
175+
176+
func (w *cycleWalker) leave(ref string, completed, edge bool) {
177+
if edge {
178+
w.depth--
179+
}
180+
w.onStack[ref] = false
181+
w.stack = w.stack[:len(w.stack)-1]
182+
if completed {
183+
w.done[ref] = true
184+
}
185+
}
186+
187+
func (w *cycleWalker) walkSchema(s *base.Schema) error {
188+
if s == nil {
189+
return nil
190+
}
191+
for _, group := range [][]*base.SchemaProxy{s.AllOf, s.OneOf, s.AnyOf, s.PrefixItems} {
192+
for _, p := range group {
193+
if err := w.walkProxy("", p); err != nil {
194+
return err
195+
}
196+
}
197+
}
198+
if err := w.walkProxy("", s.Not); err != nil {
199+
return err
200+
}
201+
if s.Properties != nil {
202+
for _, p := range s.Properties.FromOldest() {
203+
if err := w.walkProxy("", p); err != nil {
204+
return err
205+
}
206+
}
207+
}
208+
if s.Items != nil && s.Items.IsA() {
209+
if err := w.walkProxy("", s.Items.A); err != nil {
210+
return err
211+
}
212+
}
213+
if s.AdditionalProperties != nil && s.AdditionalProperties.IsA() {
214+
if err := w.walkProxy("", s.AdditionalProperties.A); err != nil {
215+
return err
216+
}
217+
}
218+
return nil
219+
}
220+
221+
func (w *cycleWalker) recordCycle(ref string) {
222+
if w.reported[ref] {
223+
return
224+
}
225+
w.reported[ref] = true
226+
227+
start := 0
228+
for i, r := range w.stack {
229+
if r == ref {
230+
start = i
231+
break
232+
}
233+
}
234+
path := append([]string{}, w.stack[start:]...)
235+
path = append(path, ref)
236+
w.cycles = append(w.cycles, RefCycle{Ref: ref, Path: path})
237+
}

0 commit comments

Comments
 (0)