Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .agent/rules/auto-generated-files.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ globs:
- "internal/genkit/tagging.py"
- "internal/mocks/**/*.go"
- "bundle/direct/dresources/*.generated.yml"
- "bundle/internal/schema/annotations_openapi.yml"
- "bundle/internal/validation/generated/*.go"
- "bundle/schema/jsonschema.json"
- "bundle/schema/jsonschema_for_docs.json"
Expand All @@ -35,7 +34,6 @@ paths:
- "internal/genkit/tagging.py"
- "internal/mocks/**/*.go"
- "bundle/direct/dresources/*.generated.yml"
- "bundle/internal/schema/annotations_openapi.yml"
- "bundle/internal/validation/generated/*.go"
- "bundle/schema/jsonschema.json"
- "bundle/schema/jsonschema_for_docs.json"
Expand Down Expand Up @@ -72,7 +70,7 @@ Files matching this rule's glob pattern are most likely generated artifacts. Aut
- Bundle schemas:
- `./task generate-schema`
- `./task generate-schema-docs`
- This can also refresh `bundle/internal/schema/annotations_openapi.yml` when OpenAPI annotation extraction is enabled.
- Both rewrite `bundle/internal/schema/annotations.yml` in place: upstream docs are sourced from `.codegen/cli.json` at generation time, and the file is synced with the config structure (placeholders added, stale entries dropped).
- Validation generated code:
- `./task generate-validation`
- Mock files:
Expand Down
37 changes: 9 additions & 28 deletions Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -856,35 +856,17 @@ tasks:
cmds:
- go test ./acceptance -run TestAccept/bundle/refschema -update

# Regenerates the OpenAPI annotation files (annotations_openapi*.yml) from the
# checked-in .codegen/cli.json. This is the step that propagates a cli.json
# change into the schema/docs artifacts: generate-schema and generate-schema-docs
# both depend on it, so editing cli.json and running `task generate`
# percolates into jsonschema.json, jsonschema_for_docs.json and pydabs.
generate-annotations:
desc: Regenerate annotation files from .codegen/cli.json
# Dep of generate-schema and generate-schema-docs; `run: once`
# plus the fingerprint keep `task generate` from re-running it per parent.
run: once
sources:
- .codegen/cli.json
- bundle/internal/schema/**/*.go
- bundle/internal/schema/annotations*.yml
- go.mod
- go.sum
generates:
- bundle/internal/schema/annotations_openapi.yml
- bundle/internal/schema/annotations.yml
- bundle/schema/jsonschema.json
cmds:
- "DATABRICKS_CLI_JSON=.codegen/cli.json go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json"

# Upstream field documentation comes from the checked-in .codegen/cli.json;
# bundle/internal/schema/annotations.yml carries the CLI-owned docs and
# overrides and is rewritten in place (synced with the config structure).
# Editing either input and running `task generate` percolates into
# jsonschema.json, jsonschema_for_docs.json and pydabs.
generate-schema:
desc: Generate bundle JSON schema
deps: ['generate-annotations']
sources: &SCHEMA_SOURCES
- "**/*.go"
- bundle/internal/schema/annotations*.yml
- .codegen/cli.json
- bundle/internal/schema/annotations.yml
- exclude: "**/*_test.go"
- go.mod
- go.sum
Expand All @@ -893,11 +875,10 @@ tasks:
- bundle/schema/jsonschema.json
- bundle/internal/schema/annotations.yml
cmds:
- "go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json"
- "go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json .codegen/cli.json"

generate-schema-docs:
desc: Generate bundle JSON schema for documentation
deps: ['generate-annotations']
sources: *SCHEMA_SOURCES
generates:
- bundle/schema/jsonschema_for_docs.json
Expand All @@ -908,7 +889,7 @@ tasks:
# silently dropped from the output. Restore the fetch that lived in the
# old tools/post-generate.sh.
- git fetch origin 'refs/tags/v*:refs/tags/v*'
- "go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema_for_docs.json --docs"
- "go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema_for_docs.json .codegen/cli.json --docs"

generate-validation:
desc: Generate enum and required field validation code
Expand Down
62 changes: 25 additions & 37 deletions bundle/internal/annotation/file.go
Original file line number Diff line number Diff line change
@@ -1,44 +1,32 @@
package annotation

import (
"bytes"
"os"

"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/dyn/convert"
"github.com/databricks/cli/libs/dyn/merge"
"github.com/databricks/cli/libs/dyn/yamlloader"
)
// TypeAnnotation holds the documentation for one Go type. Self documents the
// type itself — it is applied to the type's JSON-schema $defs entry and is
// where enum values live — and Fields documents each of the type's fields by
// JSON name.
type TypeAnnotation struct {
Self Descriptor `json:"type,omitempty"`
Fields map[string]Descriptor `json:"fields,omitempty"`
}

// Parsed file with annotations, expected format:
// github.com/databricks/cli/bundle/config.Bundle:
//
// cluster_id:
// description: "Description"
type File map[string]map[string]Descriptor
// File is the in-memory annotations, keyed by Go type path, e.g.
// "github.com/databricks/cli/bundle/config.Bundle".
type File map[string]TypeAnnotation

func LoadAndMerge(sources []string) (File, error) {
prev := dyn.NilValue
for _, path := range sources {
b, err := os.ReadFile(path)
if err != nil {
return nil, err
}
generated, err := yamlloader.LoadYAML(path, bytes.NewBuffer(b))
if err != nil {
return nil, err
}
prev, err = merge.Merge(prev, generated)
if err != nil {
return nil, err
}
// SetField stores a descriptor for a field of typeKey, allocating the entry
// and its field map as needed.
func (f File) SetField(typeKey, name string, d Descriptor) {
ta := f[typeKey]
if ta.Fields == nil {
ta.Fields = map[string]Descriptor{}
}
ta.Fields[name] = d
f[typeKey] = ta
}

var data File

err := convert.ToTyped(&data, prev)
if err != nil {
return nil, err
}
return data, nil
// SetSelf stores the descriptor for the type itself.
func (f File) SetSelf(typeKey string, d Descriptor) {
ta := f[typeKey]
ta.Self = d
f[typeKey] = ta
}
1 change: 0 additions & 1 deletion bundle/internal/schema/.gitattributes

This file was deleted.

152 changes: 47 additions & 105 deletions bundle/internal/schema/annotations.go
Original file line number Diff line number Diff line change
@@ -1,44 +1,64 @@
package main

import (
"bytes"
"fmt"
"maps"
"os"
"reflect"
"regexp"
"slices"
"strings"

yaml3 "go.yaml.in/yaml/v3"

"github.com/databricks/cli/bundle/internal/annotation"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/dyn/convert"
"github.com/databricks/cli/libs/dyn/merge"
"github.com/databricks/cli/libs/dyn/yamlloader"
"github.com/databricks/cli/libs/dyn/yamlsaver"
"github.com/databricks/cli/libs/jsonschema"
)

type annotationHandler struct {
// Annotations read from all annotation files including all overrides
// Annotations from cli.json merged with the annotations file.
parsedAnnotations annotation.File
// Annotations from the annotations file only: the content the CLI owns
// and rewrites during sync.
fileAnnotations annotation.File
// Missing annotations for fields that are found in config that need to be added to the annotation file
missingAnnotations annotation.File
}

// Adds annotations to the JSON schema reading from the annotation files.
// More details https://json-schema.org/understanding-json-schema/reference/annotations
func newAnnotationHandler(sources []string) (*annotationHandler, error) {
data, err := annotation.LoadAndMerge(sources)
func newAnnotationHandler(extracted, fromFile annotation.File) (*annotationHandler, error) {
merged, err := mergeAnnotationFiles(extracted, fromFile)
if err != nil {
return nil, err
}
return &annotationHandler{
parsedAnnotations: merged,
fileAnnotations: fromFile,
missingAnnotations: annotation.File{},
}, nil
}

// mergeAnnotationFiles merges later layers over earlier ones with the same
// semantics the on-disk annotation files used to be merged with: maps merge
// recursively, scalars take the later value, sequences concatenate.
func mergeAnnotationFiles(files ...annotation.File) (annotation.File, error) {
prev := dyn.NilValue
for _, f := range files {
v, err := convert.FromTyped(f, dyn.NilValue)
if err != nil {
return nil, err
}
prev, err = merge.Merge(prev, v)
if err != nil {
return nil, err
}
}

var data annotation.File
err := convert.ToTyped(&data, prev)
if err != nil {
return nil, err
}
d := &annotationHandler{}
d.parsedAnnotations = data
d.missingAnnotations = annotation.File{}
return d, nil
return data, nil
}

func (d *annotationHandler) addAnnotations(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema {
Expand All @@ -48,70 +68,35 @@ func (d *annotationHandler) addAnnotations(typ reflect.Type, s jsonschema.Schema
return s
}

annotations := d.parsedAnnotations[refPath]
if annotations == nil {
annotations = map[string]annotation.Descriptor{}
}

rootTypeAnnotation, ok := annotations[RootTypeKey]
if ok {
assignAnnotation(&s, rootTypeAnnotation)
}
ta := d.parsedAnnotations[refPath]
assignAnnotation(&s, ta.Self)

for k, v := range s.Properties {
item := annotations[k]
item := ta.Fields[k]
if item.Description == "" {
item.Description = annotation.Placeholder

emptyAnnotations := d.missingAnnotations[refPath]
if emptyAnnotations == nil {
emptyAnnotations = map[string]annotation.Descriptor{}
d.missingAnnotations[refPath] = emptyAnnotations
}
emptyAnnotations[k] = item
d.missingAnnotations.SetField(refPath, k, annotation.Descriptor{Description: annotation.Placeholder})
}
assignAnnotation(v, item)
}
return s
}

// Writes missing annotations with placeholder values back to the annotation file
func (d *annotationHandler) syncWithMissingAnnotations(outputPath string) error {
existingFile, err := os.ReadFile(outputPath)
if err != nil {
return err
}
existing, err := yamlloader.LoadYAML("", bytes.NewBuffer(existingFile))
if err != nil {
return err
}

for k := range d.missingAnnotations {
if !isCliPath(k) {
delete(d.missingAnnotations, k)
fmt.Printf("Missing annotations for `%s` that are not in CLI package, try to refresh .codegen/cli.json and regenerate annotations\n", k)
}
}

missingAnnotations, err := convert.FromTyped(d.missingAnnotations, dyn.NilValue)
if err != nil {
return err
}

output, err := merge.Merge(existing, missingAnnotations)
// Writes the annotations file back in canonical form, adding placeholder
// descriptions for fields that have no documentation anywhere. Entries for
// fields that no longer exist in the config are dropped with a warning.
func (d *annotationHandler) syncWithMissingAnnotations(outputPath string, g *typeGraph) error {
updated, err := mergeAnnotationFiles(d.fileAnnotations, d.missingAnnotations)
if err != nil {
return err
}

var outputTyped annotation.File
err = convert.ToTyped(&outputTyped, output)
detached, err := saveAnnotationsFile(outputPath, updated, g)
if err != nil {
return err
}

err = saveYamlWithStyle(outputPath, outputTyped)
if err != nil {
return err
for _, k := range detached {
fmt.Printf("Dropping annotation for `%s`: no such field in the bundle configuration\n", k)
}
return nil
}
Expand Down Expand Up @@ -148,45 +133,6 @@ func assignAnnotation(s *jsonschema.Schema, a annotation.Descriptor) {
s.Enum = a.Enum
}

func saveYamlWithStyle(outputPath string, annotations annotation.File) error {
annotationOrder := yamlsaver.NewOrder([]string{"description", "markdown_description", "title", "default", "enum"})
style := map[string]yaml3.Style{}

order := getAlphabeticalOrder(annotations)
dynMap := map[string]dyn.Value{}
for k, v := range annotations {
style[k] = yaml3.LiteralStyle

properties := map[string]dyn.Value{}
propertiesOrder := getAlphabeticalOrder(v)
for key, value := range v {
d, err := convert.FromTyped(value, dyn.NilValue)
if d.Kind() == dyn.KindNil || err != nil {
properties[key] = dyn.NewValue(map[string]dyn.Value{}, []dyn.Location{{Line: propertiesOrder.Get(key)}})
continue
}
val, err := yamlsaver.ConvertToMapValue(value, annotationOrder, []string{}, map[string]dyn.Value{})
if err != nil {
return err
}
properties[key] = val.WithLocations([]dyn.Location{{Line: propertiesOrder.Get(key)}})
}

dynMap[k] = dyn.NewValue(properties, []dyn.Location{{Line: order.Get(k)}})
}

saver := yamlsaver.NewSaverWithStyle(style)
err := saver.SaveAsYAML(dynMap, outputPath, true)
if err != nil {
return err
}
return nil
}

func getAlphabeticalOrder[T any](mapping map[string]T) *yamlsaver.Order {
return yamlsaver.NewOrder(slices.Sorted(maps.Keys(mapping)))
}

func convertLinksToAbsoluteUrl(s string) string {
if s == "" {
return s
Expand Down Expand Up @@ -232,7 +178,3 @@ func convertLinksToAbsoluteUrl(s string) string {

return result
}

func isCliPath(path string) bool {
return !strings.HasPrefix(path, "github.com/databricks/databricks-sdk-go")
}
Loading
Loading