Skip to content

Commit dbc534b

Browse files
authored
feat: Add JSON type schema (#1796)
#### Summary Part of cloudquery/cloudquery#2023 ~~Still WIP and depends on cloudquery/cloudquery-api-go#196 ---
1 parent 7955c53 commit dbc534b

5 files changed

Lines changed: 275 additions & 10 deletions

File tree

schema/arrow.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ const (
1010
MetadataPrimaryKeyComponent = "cq:extension:primary_key_component"
1111
MetadataConstraintName = "cq:extension:constraint_name"
1212
MetadataIncremental = "cq:extension:incremental"
13+
MetadataTypeSchema = "cq:extension:type_schema"
1314

1415
MetadataTrue = "true"
1516
MetadataFalse = "false"

schema/column.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ type Column struct {
4646

4747
// PrimaryKeyComponent is a flag that indicates if the column is used as part of the input to calculate the value of `_cq_id`.
4848
PrimaryKeyComponent bool `json:"primary_key_component"`
49+
50+
// If the column type is JSON, this field will have a JSON string that represents the schema of the JSON object.
51+
TypeSchema string `json:"type_schema,omitempty"`
4952
}
5053

5154
// NewColumnFromArrowField creates a new Column from an arrow.Field
@@ -70,6 +73,9 @@ func NewColumnFromArrowField(f arrow.Field) Column {
7073
v, ok = f.Metadata.GetValue(MetadataPrimaryKeyComponent)
7174
column.PrimaryKeyComponent = ok && v == MetadataTrue
7275

76+
v, _ = f.Metadata.GetValue(MetadataTypeSchema)
77+
column.TypeSchema = v
78+
7379
return column
7480
}
7581

@@ -79,6 +85,7 @@ func (c Column) ToArrowField() arrow.Field {
7985
MetadataUnique: MetadataFalse,
8086
MetadataIncremental: MetadataFalse,
8187
MetadataPrimaryKeyComponent: MetadataFalse,
88+
MetadataTypeSchema: c.TypeSchema,
8289
}
8390
if c.PrimaryKey {
8491
mdKV[MetadataPrimaryKey] = MetadataTrue
@@ -111,6 +118,7 @@ func (c Column) MarshalJSON() ([]byte, error) {
111118
Unique bool `json:"unique"`
112119
IncrementalKey bool `json:"incremental_key"`
113120
PrimaryKeyComponent bool `json:"primary_key_component"`
121+
TypeSchema string `json:"type_schema,omitempty"`
114122
}
115123
var alias Alias
116124
alias.Name = c.Name
@@ -121,6 +129,7 @@ func (c Column) MarshalJSON() ([]byte, error) {
121129
alias.Unique = c.Unique
122130
alias.IncrementalKey = c.IncrementalKey
123131
alias.PrimaryKeyComponent = c.PrimaryKeyComponent
132+
alias.TypeSchema = c.TypeSchema
124133

125134
return json.Marshal(alias)
126135
}

serve/package.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ func (s *PluginServe) writeTablesJSON(ctx context.Context, dir string) error {
7575
}
7676
columns := make([]cloudquery_api.PluginTableColumn, 0, len(table.Columns))
7777
for _, column := range table.Columns {
78-
columns = append(columns, cloudquery_api.PluginTableColumn{
78+
c := cloudquery_api.PluginTableColumn{
7979
Name: column.Name,
8080
Description: column.Description,
8181
Type: column.Type.String(),
@@ -86,7 +86,12 @@ func (s *PluginServe) writeTablesJSON(ctx context.Context, dir string) error {
8686
// 2. If the column is a `PrimaryKey` and both of the following are true column name is NOT `_cq_id` and there are other columns that are a PrimaryKeyComponent
8787
PrimaryKey: (column.PrimaryKey && !(column.Name == schema.CqIDColumn.Name && len(table.PrimaryKeyComponents()) > 0)) || column.PrimaryKeyComponent,
8888
Unique: column.Unique,
89-
})
89+
}
90+
if column.TypeSchema != "" {
91+
typeSchema := column.TypeSchema
92+
c.TypeSchema = &typeSchema
93+
}
94+
columns = append(columns, c)
9095
}
9196
tablesToEncode = append(tablesToEncode, cloudquery_api.PluginTableCreate{
9297
Description: &table.Description,

transformers/struct.go

Lines changed: 112 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,21 @@
11
package transformers
22

33
import (
4+
"bytes"
5+
"encoding/json"
46
"fmt"
57
"reflect"
68
"slices"
9+
"strings"
710

11+
"github.com/apache/arrow/go/v17/arrow"
812
"github.com/cloudquery/plugin-sdk/v4/schema"
13+
"github.com/cloudquery/plugin-sdk/v4/types"
914
"github.com/thoas/go-funk"
1015
)
1116

17+
const maxJSONTypeSchemaDepth = 5
18+
1219
type structTransformer struct {
1320
table *schema.Table
1421
skipFields []string
@@ -111,17 +118,11 @@ func (t *structTransformer) addColumnFromField(field reflect.StructField, parent
111118
return nil
112119
}
113120

114-
columnType, err := t.typeTransformer(field)
121+
columnType, err := t.getColumnType(field)
115122
if err != nil {
116-
return fmt.Errorf("failed to transform type for field %s: %w", field.Name, err)
123+
return err
117124
}
118125

119-
if columnType == nil {
120-
columnType, err = DefaultTypeTransformer(field)
121-
if err != nil {
122-
return fmt.Errorf("failed to transform type for field %s: %w", field.Name, err)
123-
}
124-
}
125126
if columnType == nil {
126127
return nil // ignored
127128
}
@@ -159,6 +160,11 @@ func (t *structTransformer) addColumnFromField(field reflect.StructField, parent
159160
IgnoreInTests: t.ignoreInTestsTransformer(field),
160161
}
161162

163+
// Enrich JSON column with detailed schema
164+
if columnType == types.ExtensionTypes.JSON {
165+
column.TypeSchema = structSchemaToJSON(t.fieldToJSONSchema(field, 0))
166+
}
167+
162168
for _, pk := range t.pkFields {
163169
if pk == path {
164170
// use path to allow the following
@@ -233,3 +239,101 @@ func TransformWithStruct(st any, opts ...StructTransformerOption) schema.Transfo
233239
return nil
234240
}
235241
}
242+
243+
func (t *structTransformer) getColumnType(field reflect.StructField) (arrow.DataType, error) {
244+
columnType, err := t.typeTransformer(field)
245+
if err != nil {
246+
return nil, fmt.Errorf("failed to transform type for field %s: %w", field.Name, err)
247+
}
248+
249+
if columnType == nil {
250+
columnType, err = DefaultTypeTransformer(field)
251+
if err != nil {
252+
return nil, fmt.Errorf("failed to transform type for field %s: %w", field.Name, err)
253+
}
254+
}
255+
return columnType, nil
256+
}
257+
258+
func structSchemaToJSON(s any) string {
259+
b := new(bytes.Buffer)
260+
encoder := json.NewEncoder(b)
261+
encoder.SetEscapeHTML(false)
262+
_ = encoder.Encode(s)
263+
return strings.TrimSpace(b.String())
264+
}
265+
266+
func normalizePointer(field reflect.StructField) reflect.Value {
267+
if field.Type.Kind() == reflect.Ptr {
268+
return reflect.New(field.Type.Elem())
269+
}
270+
return reflect.New(field.Type)
271+
}
272+
273+
func (t *structTransformer) fieldToJSONSchema(field reflect.StructField, depth int) any {
274+
transformInput := normalizePointer(field)
275+
switch transformInput.Elem().Kind() {
276+
case reflect.Struct:
277+
fieldsMap := make(map[string]any)
278+
fieldType := transformInput.Elem().Type()
279+
for i := 0; i < fieldType.NumField(); i++ {
280+
name, err := t.nameTransformer(fieldType.Field(i))
281+
if err != nil {
282+
continue
283+
}
284+
columnType, err := t.getColumnType(fieldType.Field(i))
285+
if err != nil {
286+
continue
287+
}
288+
if columnType == nil {
289+
fieldsMap[name] = "any"
290+
continue
291+
}
292+
// Avoid infinite recursion
293+
if columnType == types.ExtensionTypes.JSON && depth < maxJSONTypeSchemaDepth {
294+
fieldsMap[name] = t.fieldToJSONSchema(fieldType.Field(i), depth+1)
295+
continue
296+
}
297+
asList, ok := columnType.(*arrow.ListType)
298+
if ok {
299+
fieldsMap[name] = []any{asList.Elem().String()}
300+
continue
301+
}
302+
fieldsMap[name] = columnType.String()
303+
}
304+
return fieldsMap
305+
case reflect.Map:
306+
keySchema, ok := t.fieldToJSONSchema(reflect.StructField{
307+
Type: field.Type.Key(),
308+
}, depth+1).(string)
309+
if keySchema == "" || !ok {
310+
return ""
311+
}
312+
valueSchema := t.fieldToJSONSchema(reflect.StructField{
313+
Type: field.Type.Elem(),
314+
}, depth+1)
315+
if valueSchema == "" {
316+
return ""
317+
}
318+
return map[string]any{
319+
keySchema: valueSchema,
320+
}
321+
case reflect.Slice:
322+
valueSchema := t.fieldToJSONSchema(reflect.StructField{
323+
Type: field.Type.Elem(),
324+
}, depth+1)
325+
if valueSchema == "" {
326+
return ""
327+
}
328+
return []any{valueSchema}
329+
}
330+
331+
columnType, err := t.getColumnType(field)
332+
if err != nil {
333+
return ""
334+
}
335+
if columnType == nil {
336+
return "any"
337+
}
338+
return columnType.String()
339+
}

transformers/struct_test.go

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,3 +480,149 @@ func TestTableFromGoStruct(t *testing.T) {
480480
})
481481
}
482482
}
483+
484+
func TestJSONTypeSchema(t *testing.T) {
485+
tests := []struct {
486+
name string
487+
testStruct any
488+
want map[string]string
489+
}{
490+
{
491+
name: "simple map",
492+
testStruct: struct {
493+
Tags map[string]string `json:"tags"`
494+
}{},
495+
want: map[string]string{
496+
"tags": `{"utf8":"utf8"}`,
497+
},
498+
},
499+
{
500+
name: "simple array",
501+
testStruct: struct {
502+
Items []struct {
503+
Name string `json:"name"`
504+
} `json:"items"`
505+
}{},
506+
want: map[string]string{
507+
"items": `[{"name":"utf8"}]`,
508+
},
509+
},
510+
{
511+
name: "simple struct",
512+
testStruct: struct {
513+
Item struct {
514+
Name string `json:"name"`
515+
} `json:"item"`
516+
}{},
517+
want: map[string]string{
518+
"item": `{"name":"utf8"}`,
519+
},
520+
},
521+
{
522+
name: "complex struct",
523+
testStruct: struct {
524+
Item struct {
525+
Name string `json:"name"`
526+
Tags map[string]string `json:"tags"`
527+
FlatItems []string `json:"flat_items"`
528+
ComplexItems []struct {
529+
Name string `json:"name"`
530+
} `json:"complex_items"`
531+
} `json:"item"`
532+
}{},
533+
want: map[string]string{
534+
"item": `{"complex_items":[{"name":"utf8"}],"flat_items":["utf8"],"name":"utf8","tags":{"utf8":"utf8"}}`,
535+
},
536+
},
537+
{
538+
name: "multiple json columns",
539+
testStruct: struct {
540+
Tags map[string]string `json:"tags"`
541+
Item struct {
542+
Name string `json:"name"`
543+
Tags map[string]string `json:"tags"`
544+
FlatItems []string `json:"flat_items"`
545+
ComplexItems []struct {
546+
Name string `json:"name"`
547+
} `json:"complex_items"`
548+
} `json:"item"`
549+
}{},
550+
want: map[string]string{
551+
"item": `{"complex_items":[{"name":"utf8"}],"flat_items":["utf8"],"name":"utf8","tags":{"utf8":"utf8"}}`,
552+
},
553+
},
554+
{
555+
name: "handles any type in struct",
556+
testStruct: struct {
557+
Item struct {
558+
Name string `json:"name"`
559+
Object any `json:"object"`
560+
} `json:"item"`
561+
}{},
562+
want: map[string]string{
563+
"item": `{"name":"utf8","object":"any"}`,
564+
},
565+
},
566+
{
567+
name: "handles map from string to any",
568+
testStruct: struct {
569+
Tags map[string]any `json:"tags"`
570+
}{},
571+
want: map[string]string{
572+
"tags": `{"utf8":"any"}`,
573+
},
574+
},
575+
{
576+
name: "handles array of any",
577+
testStruct: struct {
578+
Items []any `json:"items"`
579+
}{},
580+
want: map[string]string{
581+
"items": `["any"]`,
582+
},
583+
},
584+
{
585+
name: "stops at the default depth of 5",
586+
testStruct: struct {
587+
Level0 struct {
588+
Level1 struct {
589+
Level2 struct {
590+
Level3 struct {
591+
Level4 struct {
592+
Level5 struct {
593+
Level6 struct {
594+
Name string `json:"name"`
595+
} `json:"level6"`
596+
} `json:"level5"`
597+
} `json:"level4"`
598+
} `json:"level3"`
599+
} `json:"level2"`
600+
} `json:"level1"`
601+
} `json:"level0"`
602+
}{},
603+
want: map[string]string{
604+
"level0": `{"level1":{"level2":{"level3":{"level4":{"level5":{"level6":"json"}}}}}}`,
605+
},
606+
},
607+
}
608+
609+
for _, tt := range tests {
610+
tt := tt
611+
t.Run(tt.name, func(t *testing.T) {
612+
table := schema.Table{
613+
Name: "test",
614+
}
615+
transformer := TransformWithStruct(tt.testStruct)
616+
err := transformer(&table)
617+
if err != nil {
618+
t.Fatal(err)
619+
}
620+
for col, schema := range tt.want {
621+
column := table.Column(col)
622+
if diff := cmp.Diff(column.TypeSchema, schema); diff != "" {
623+
t.Fatalf("table does not match expected. diff (-got, +want): %v", diff)
624+
}
625+
}
626+
})
627+
}
628+
}

0 commit comments

Comments
 (0)