diff --git a/db/cel_functions.go b/db/cel_functions.go new file mode 100644 index 000000000..1b16c4270 --- /dev/null +++ b/db/cel_functions.go @@ -0,0 +1,162 @@ +package db + +import ( + "encoding/json" + + "github.com/flanksource/duty/context" + dutyModels "github.com/flanksource/duty/models" + "github.com/google/cel-go/cel" + "github.com/google/cel-go/common/types" + "github.com/google/cel-go/common/types/ref" + "github.com/google/uuid" +) + +func init() { + context.CelEnvFuncs["db.external_users"] = externalUsersCEL(false) + context.CelEnvFuncs["db.external_users_all"] = externalUsersCEL(true) + context.CelEnvFuncs["db.external_groups"] = externalGroupsCEL(false) + context.CelEnvFuncs["db.external_groups_all"] = externalGroupsCEL(true) + context.CelEnvFuncs["db.external_roles"] = externalRolesCEL(false) + context.CelEnvFuncs["db.external_roles_all"] = externalRolesCEL(true) + context.CelEnvFuncs["db.config_access"] = configAccessCEL(false) + context.CelEnvFuncs["db.config_access_all"] = configAccessCEL(true) + context.CelEnvFuncs["db.config_access_logs"] = configAccessLogsCEL() +} + +func externalUsersCEL(includeDeleted bool) func(context.Context) cel.EnvOption { + suffix := "" + if includeDeleted { + suffix = "_all" + } + return func(ctx context.Context) cel.EnvOption { + return cel.Function("db.external_users"+suffix, + cel.Overload("db_external_users"+suffix+"_string", + []*cel.Type{cel.StringType}, + cel.ListType(cel.DynType), + cel.UnaryBinding(func(arg ref.Val) ref.Val { + scraperID, err := uuid.Parse(arg.Value().(string)) + if err != nil { + return types.WrapErr(err) + } + return queryEntities[dutyModels.ExternalUser](ctx, "external_users", scraperID, includeDeleted) + }), + ), + ) + } +} + +func externalGroupsCEL(includeDeleted bool) func(context.Context) cel.EnvOption { + suffix := "" + if includeDeleted { + suffix = "_all" + } + return func(ctx context.Context) cel.EnvOption { + return cel.Function("db.external_groups"+suffix, + cel.Overload("db_external_groups"+suffix+"_string", + []*cel.Type{cel.StringType}, + cel.ListType(cel.DynType), + cel.UnaryBinding(func(arg ref.Val) ref.Val { + scraperID, err := uuid.Parse(arg.Value().(string)) + if err != nil { + return types.WrapErr(err) + } + return queryEntities[dutyModels.ExternalGroup](ctx, "external_groups", scraperID, includeDeleted) + }), + ), + ) + } +} + +func externalRolesCEL(includeDeleted bool) func(context.Context) cel.EnvOption { + suffix := "" + if includeDeleted { + suffix = "_all" + } + return func(ctx context.Context) cel.EnvOption { + return cel.Function("db.external_roles"+suffix, + cel.Overload("db_external_roles"+suffix+"_string", + []*cel.Type{cel.StringType}, + cel.ListType(cel.DynType), + cel.UnaryBinding(func(arg ref.Val) ref.Val { + scraperID, err := uuid.Parse(arg.Value().(string)) + if err != nil { + return types.WrapErr(err) + } + return queryEntities[dutyModels.ExternalRole](ctx, "external_roles", scraperID, includeDeleted) + }), + ), + ) + } +} + +func queryEntities[T any](ctx context.Context, table string, scraperID uuid.UUID, includeDeleted bool) ref.Val { + var rows []T + q := ctx.DB().Table(table).Where("scraper_id = ?", scraperID) + if !includeDeleted { + q = q.Where("deleted_at IS NULL") + } + if err := q.Find(&rows).Error; err != nil { + return types.WrapErr(err) + } + raw, _ := json.Marshal(rows) + var result []any + _ = json.Unmarshal(raw, &result) + if result == nil { + result = []any{} + } + return types.DefaultTypeAdapter.NativeToValue(result) +} + +func configAccessCEL(includeDeleted bool) func(context.Context) cel.EnvOption { + suffix := "" + if includeDeleted { + suffix = "_all" + } + return func(ctx context.Context) cel.EnvOption { + return cel.Function("db.config_access"+suffix, + cel.Overload("db_config_access"+suffix+"_string", + []*cel.Type{cel.StringType}, + cel.ListType(cel.DynType), + cel.UnaryBinding(func(arg ref.Val) ref.Val { + scraperID, err := uuid.Parse(arg.Value().(string)) + if err != nil { + return types.WrapErr(err) + } + return queryEntities[dutyModels.ConfigAccess](ctx, "config_access", scraperID, includeDeleted) + }), + ), + ) + } +} + +func configAccessLogsCEL() func(context.Context) cel.EnvOption { + return func(ctx context.Context) cel.EnvOption { + return cel.Function("db.config_access_logs", + cel.Overload("db_config_access_logs_string", + []*cel.Type{cel.StringType}, + cel.ListType(cel.DynType), + cel.UnaryBinding(func(arg ref.Val) ref.Val { + scraperID, err := uuid.Parse(arg.Value().(string)) + if err != nil { + return types.WrapErr(err) + } + return queryConfigAccessLogs(ctx, scraperID) + }), + ), + ) + } +} + +func queryConfigAccessLogs(ctx context.Context, scraperID uuid.UUID) ref.Val { + var rows []dutyModels.ConfigAccessLog + if err := ctx.DB().Table("config_access_logs").Where("scraper_id = ?", scraperID).Find(&rows).Error; err != nil { + return types.WrapErr(err) + } + raw, _ := json.Marshal(rows) + var result []any + _ = json.Unmarshal(raw, &result) + if result == nil { + result = []any{} + } + return types.DefaultTypeAdapter.NativeToValue(result) +} diff --git a/db/external_cache.go b/db/external_cache.go index 22451daaf..e561bde33 100644 --- a/db/external_cache.go +++ b/db/external_cache.go @@ -1,7 +1,6 @@ package db import ( - "errors" "fmt" "time" @@ -14,7 +13,6 @@ import ( "github.com/lib/pq" "github.com/patrickmn/go-cache" "github.com/samber/lo" - "gorm.io/gorm" ) var CACHE_TIMEOUT = properties.Duration(time.Hour*24, "external.cache.timeout") @@ -39,20 +37,6 @@ type externalEntityWithID interface { TableName() string } -// getEntityID extracts the ID from an external entity -func getEntityID[T externalEntityWithID](entity *T) uuid.UUID { - switch e := any(entity).(type) { - case *dutyModels.ExternalUser: - return e.ID - case *dutyModels.ExternalRole: - return e.ID - case *dutyModels.ExternalGroup: - return e.ID - default: - return uuid.Nil - } -} - // getEntityCache returns the appropriate cache for an external entity type func getEntityCache[T externalEntityWithID]() *cache.Cache { var zero T @@ -107,41 +91,54 @@ func WarmExternalEntityCaches(ctx context.Context) { // findExternalEntityIDByAliases looks up an external entity ID by aliases. // It first checks the cache, then queries the DB. Returns the ID if found, nil otherwise. func findExternalEntityIDByAliases[T externalEntityWithID](ctx api.ScrapeContext, aliases []string) (*uuid.UUID, error) { + ids, err := findAllExternalEntityIDsByAliases[T](ctx, aliases) + if err != nil { + return nil, err + } + if len(ids) == 0 { + return nil, nil + } + return lo.ToPtr(ids[0]), nil +} + +// findAllExternalEntityIDsByAliases returns all distinct entity IDs that share any alias with the given set. +func findAllExternalEntityIDsByAliases[T externalEntityWithID](ctx api.ScrapeContext, aliases []string) ([]uuid.UUID, error) { aliasCache := getEntityCache[T]() + seen := make(map[uuid.UUID]bool) for _, alias := range aliases { if cachedID, ok := aliasCache.Get(alias); ok { - id, valid := cachedID.(uuid.UUID) - if !valid { - continue + if id, valid := cachedID.(uuid.UUID); valid { + seen[id] = true } - return lo.ToPtr(id), nil } } - // Query DB for any matching alias - for _, alias := range aliases { - var entity T - err := ctx.DB(). - Select("id"). - Where("? = ANY(aliases)", alias). - Where("deleted_at IS NULL"). - First(&entity).Error + var zero T + var dbIDs []uuid.UUID + if err := ctx.DB().Table(zero.TableName()). + Select("DISTINCT id"). + Where("aliases && ?", pq.StringArray(aliases)). + Where("deleted_at IS NULL"). + Pluck("id", &dbIDs).Error; err != nil { + return nil, fmt.Errorf("failed to query %s by aliases: %w", zero.TableName(), err) + } - if err != nil { - if !errors.Is(err, gorm.ErrRecordNotFound) { - return nil, fmt.Errorf("failed to query %s by alias: %w", entity.TableName(), err) - } - continue - } + for _, id := range dbIDs { + seen[id] = true + } + + result := make([]uuid.UUID, 0, len(seen)) + for id := range seen { + result = append(result, id) + } - // Found in DB, populate cache for all aliases and return - id := getEntityID(&entity) + // Populate cache + for _, id := range result { for _, a := range aliases { aliasCache.Set(a, id, cache.DefaultExpiration) } - return lo.ToPtr(id), nil } - return nil, nil + return result, nil } diff --git a/db/external_entities.go b/db/external_entities.go index 27b2cd2ad..a8ac72e60 100644 --- a/db/external_entities.go +++ b/db/external_entities.go @@ -33,13 +33,13 @@ func syncExternalEntities(ctx api.ScrapeContext, extract *extractResult, scraper now := time.Now() - resolvedUsers, skippedUsers, userIDMap, err := resolveExternalUsers(ctx, extract.externalUsers, scraperID, now) + resolvedUsers, skippedUsers, err := resolveExternalUsers(ctx, extract.externalUsers, scraperID, now) if err != nil { return result, nil, err } result.Users.Skipped = skippedUsers - resolvedGroups, skippedGroups, groupIDMap, err := resolveExternalGroups(ctx, extract.externalGroups, scraperID, now) + resolvedGroups, skippedGroups, err := resolveExternalGroups(ctx, extract.externalGroups, scraperID, now) if err != nil { return result, nil, err } @@ -53,12 +53,6 @@ func syncExternalEntities(ctx api.ScrapeContext, extract *extractResult, scraper var resolvedUserGroups []dutyModels.ExternalUserGroup for _, ug := range extract.externalUserGroups { - if savedID, ok := userIDMap[ug.ExternalUserID]; ok { - ug.ExternalUserID = savedID - } - if savedID, ok := groupIDMap[ug.ExternalGroupID]; ok { - ug.ExternalGroupID = savedID - } if ug.ExternalUserID == uuid.Nil || ug.ExternalGroupID == uuid.Nil { ctx.Logger.Warnf("skipping external user group with nil user_id=%s or group_id=%s", ug.ExternalUserID, ug.ExternalGroupID) continue @@ -66,15 +60,15 @@ func syncExternalEntities(ctx api.ScrapeContext, extract *extractResult, scraper resolvedUserGroups = append(resolvedUserGroups, ug) } - counts, err := upsertExternalEntities(ctx, resolvedUsers, resolvedGroups, resolvedRoles, resolvedUserGroups, scraperID) + counts, idMap, err := upsertExternalEntities(ctx, resolvedUsers, resolvedGroups, resolvedRoles, resolvedUserGroups, scraperID) if err != nil { return result, nil, err } - // Populate caches only after transaction has committed successfully. - // When scraperID is nil, upsertExternalEntities skips insertion, - // so we must not cache IDs that don't exist in the DB. if scraperID != nil { + for loserID := range idMap { + ExternalUserIDCache.Delete(loserID.String()) + } for _, u := range resolvedUsers { ExternalUserIDCache.Set(u.ID.String(), u.ID, cache.DefaultExpiration) for _, alias := range u.Aliases { @@ -99,172 +93,106 @@ func syncExternalEntities(ctx api.ScrapeContext, extract *extractResult, scraper result.Groups.Deleted = counts.groupsDeleted result.Roles.Saved = counts.rolesSaved result.Roles.Deleted = counts.rolesDeleted - return result, userIDMap, nil + return result, idMap, nil } -func resolveExternalUsers(ctx api.ScrapeContext, users []dutyModels.ExternalUser, scraperID *uuid.UUID, now time.Time) ([]dutyModels.ExternalUser, int, map[uuid.UUID]uuid.UUID, error) { - var resolved []dutyModels.ExternalUser +func resolveExternalUsers(ctx api.ScrapeContext, users []dutyModels.ExternalUser, scraperID *uuid.UUID, now time.Time) ([]dutyModels.ExternalUser, int, error) { + var valid []dutyModels.ExternalUser var skipped int - idMap := make(map[uuid.UUID]uuid.UUID) - seen := make(map[uuid.UUID]int) // id -> index in resolved slice - for _, u := range users { + for i := range users { + u := &users[i] u.ScraperID = lo.Ternary(u.ScraperID == uuid.Nil, lo.FromPtr(scraperID), u.ScraperID) - originalID := u.ID - if u.ID == uuid.Nil && len(u.Aliases) == 0 { ctx.Logger.Warnf("skipping external user %q with no ID and no aliases", u.Name) skipped++ continue } - - if len(u.Aliases) > 0 { + if u.ID != uuid.Nil { + u.Aliases = appendUnique(u.Aliases, u.ID.String()) + } + if u.ID == uuid.Nil { sort.Strings(u.Aliases) - existingID, err := findExternalEntityIDByAliases[dutyModels.ExternalUser](ctx, u.Aliases) + hid, err := hash.DeterministicUUID(u.Aliases) if err != nil { - return nil, 0, nil, ctx.Oops().With("aliases", u.Aliases).Wrapf(err, "failed to find external user by aliases") - } - if existingID != nil { - if u.ID != uuid.Nil && u.ID != *existingID { - u.Aliases = append(u.Aliases, u.ID.String()) - } - u.ID = *existingID - } else if u.ID == uuid.Nil { - hid, err := hash.DeterministicUUID(u.Aliases) - if err != nil { - return nil, 0, nil, ctx.Oops().With("user", u.Name).Wrapf(err, "failed to generate id for external user") - } - u.ID = hid + return nil, 0, ctx.Oops().With("user", u.Name).Wrapf(err, "failed to generate id for external user") } + u.ID = hid } - if u.CreatedAt.IsZero() { u.CreatedAt = now } u.UpdatedAt = &now - - // Deduplicate by resolved ID — merge aliases into first occurrence - if idx, exists := seen[u.ID]; exists { - for _, alias := range u.Aliases { - if !slices.Contains(resolved[idx].Aliases, alias) { - resolved[idx].Aliases = append(resolved[idx].Aliases, alias) - } - } - } else { - seen[u.ID] = len(resolved) - resolved = append(resolved, u) - } - - if originalID != uuid.Nil && originalID != u.ID { - idMap[originalID] = u.ID - } + valid = append(valid, *u) } - return resolved, skipped, idMap, nil + + return valid, skipped, nil } -func resolveExternalGroups(ctx api.ScrapeContext, groups []dutyModels.ExternalGroup, scraperID *uuid.UUID, now time.Time) ([]dutyModels.ExternalGroup, int, map[uuid.UUID]uuid.UUID, error) { - var resolved []dutyModels.ExternalGroup +func resolveExternalGroups(ctx api.ScrapeContext, groups []dutyModels.ExternalGroup, scraperID *uuid.UUID, now time.Time) ([]dutyModels.ExternalGroup, int, error) { + var valid []dutyModels.ExternalGroup var skipped int - idMap := make(map[uuid.UUID]uuid.UUID) - seen := make(map[uuid.UUID]int) - for _, g := range groups { + for i := range groups { + g := &groups[i] g.ScraperID = lo.Ternary(g.ScraperID == uuid.Nil, lo.FromPtr(scraperID), g.ScraperID) - originalID := g.ID - if g.ID == uuid.Nil && len(g.Aliases) == 0 { ctx.Logger.Warnf("skipping external group %q with no ID and no aliases", g.Name) skipped++ continue } - - if len(g.Aliases) > 0 { + if g.ID != uuid.Nil { + g.Aliases = appendUnique(g.Aliases, g.ID.String()) + } + if g.ID == uuid.Nil { sort.Strings(g.Aliases) - existingID, err := findExternalEntityIDByAliases[dutyModels.ExternalGroup](ctx, g.Aliases) + hid, err := hash.DeterministicUUID(g.Aliases) if err != nil { - return nil, 0, nil, ctx.Oops().With("aliases", g.Aliases).Wrapf(err, "failed to find external group by aliases") - } - if existingID != nil { - g.ID = *existingID - } else if g.ID == uuid.Nil { - hid, err := hash.DeterministicUUID(g.Aliases) - if err != nil { - return nil, 0, nil, ctx.Oops().With("group", g.Name).Wrapf(err, "failed to generate id for external group") - } - g.ID = hid + return nil, 0, ctx.Oops().With("group", g.Name).Wrapf(err, "failed to generate id for external group") } + g.ID = hid } - if g.CreatedAt.IsZero() { g.CreatedAt = now } g.UpdatedAt = &now - - if idx, exists := seen[g.ID]; exists { - for _, alias := range g.Aliases { - if !slices.Contains(resolved[idx].Aliases, alias) { - resolved[idx].Aliases = append(resolved[idx].Aliases, alias) - } - } - } else { - seen[g.ID] = len(resolved) - resolved = append(resolved, g) - if originalID != uuid.Nil && originalID != g.ID { - idMap[originalID] = g.ID - } - } + valid = append(valid, *g) } - return resolved, skipped, idMap, nil + + return valid, skipped, nil } func resolveExternalRoles(ctx api.ScrapeContext, roles []dutyModels.ExternalRole, scraperID *uuid.UUID, now time.Time) ([]dutyModels.ExternalRole, int, error) { - var resolved []dutyModels.ExternalRole + var valid []dutyModels.ExternalRole var skipped int - seen := make(map[uuid.UUID]int) - for _, r := range roles { + for i := range roles { + r := &roles[i] r.ScraperID = lo.Ternary(r.ScraperID == nil, scraperID, r.ScraperID) - if r.ID == uuid.Nil && len(r.Aliases) == 0 { ctx.Logger.Warnf("skipping external role %q with no ID and no aliases", r.Name) skipped++ continue } - - if len(r.Aliases) > 0 { + if r.ID != uuid.Nil { + r.Aliases = appendUnique(r.Aliases, r.ID.String()) + } + if r.ID == uuid.Nil { sort.Strings(r.Aliases) - existingID, err := findExternalEntityIDByAliases[dutyModels.ExternalRole](ctx, r.Aliases) + hid, err := hash.DeterministicUUID(r.Aliases) if err != nil { - return nil, 0, ctx.Oops().With("aliases", r.Aliases).Wrapf(err, "failed to find external role by aliases") - } - if existingID != nil { - r.ID = *existingID - } else if r.ID == uuid.Nil { - hid, err := hash.DeterministicUUID(r.Aliases) - if err != nil { - return nil, 0, ctx.Oops().With("role", r.Name).Wrapf(err, "failed to generate id for external role") - } - r.ID = hid + return nil, 0, ctx.Oops().With("role", r.Name).Wrapf(err, "failed to generate id for external role") } + r.ID = hid } - if r.CreatedAt.IsZero() { r.CreatedAt = now } r.UpdatedAt = &now - - if idx, exists := seen[r.ID]; exists { - for _, alias := range r.Aliases { - if !slices.Contains(resolved[idx].Aliases, alias) { - resolved[idx].Aliases = append(resolved[idx].Aliases, alias) - } - } - } else { - seen[r.ID] = len(resolved) - resolved = append(resolved, r) - } + valid = append(valid, *r) } - return resolved, skipped, nil + + return valid, skipped, nil } type upsertCounts struct { @@ -273,6 +201,25 @@ type upsertCounts struct { rolesSaved, rolesDeleted int } +func remapExternalUserGroups(userGroups []dutyModels.ExternalUserGroup, userIDMap, groupIDMap map[uuid.UUID]uuid.UUID) []dutyModels.ExternalUserGroup { + if len(userGroups) == 0 || (len(userIDMap) == 0 && len(groupIDMap) == 0) { + return userGroups + } + + remapped := make([]dutyModels.ExternalUserGroup, len(userGroups)) + for i, ug := range userGroups { + if winner, ok := userIDMap[ug.ExternalUserID]; ok { + ug.ExternalUserID = winner + } + if winner, ok := groupIDMap[ug.ExternalGroupID]; ok { + ug.ExternalGroupID = winner + } + remapped[i] = ug + } + + return remapped +} + func upsertExternalEntities( ctx api.ScrapeContext, users []dutyModels.ExternalUser, @@ -280,23 +227,25 @@ func upsertExternalEntities( roles []dutyModels.ExternalRole, userGroups []dutyModels.ExternalUserGroup, scraperID *uuid.UUID, -) (upsertCounts, error) { +) (upsertCounts, map[uuid.UUID]uuid.UUID, error) { var counts upsertCounts + userIDMap := make(map[uuid.UUID]uuid.UUID) + groupIDMap := make(map[uuid.UUID]uuid.UUID) + if scraperID == nil { - return counts, nil + return counts, userIDMap, nil } - scraperIDStr := scraperID.String() - suffix := sanitizeForTempTable(scraperIDStr) + suffix := sanitizeForTempTable(scraperID.String()) tx := ctx.DB().Begin() if tx.Error != nil { - return counts, fmt.Errorf("failed to begin transaction: %w", tx.Error) + return counts, nil, fmt.Errorf("failed to begin transaction: %w", tx.Error) } defer func() { if r := recover(); r != nil { tx.Rollback() - panic(r) // Re-panic to propagate the error + panic(r) } }() @@ -305,104 +254,88 @@ func upsertExternalEntities( tempRoles := fmt.Sprintf("_ext_roles_%s", suffix) tempUserGroups := fmt.Sprintf("_ext_user_groups_%s", suffix) - // Create temp tables for non-empty entity slices if len(users) > 0 { if err := createTempAndInsert(tx, tempUsers, "external_users", users); err != nil { tx.Rollback() - return counts, fmt.Errorf("failed to setup temp users: %w", err) + return counts, nil, fmt.Errorf("failed to setup temp users: %w", err) } } if len(groups) > 0 { if err := createTempAndInsert(tx, tempGroups, "external_groups", groups); err != nil { tx.Rollback() - return counts, fmt.Errorf("failed to setup temp groups: %w", err) + return counts, nil, fmt.Errorf("failed to setup temp groups: %w", err) } } if len(roles) > 0 { if err := createTempAndInsert(tx, tempRoles, "external_roles", roles); err != nil { tx.Rollback() - return counts, fmt.Errorf("failed to setup temp roles: %w", err) + return counts, nil, fmt.Errorf("failed to setup temp roles: %w", err) } } - if len(userGroups) > 0 { - if err := createTempAndInsert(tx, tempUserGroups, "external_user_groups", userGroups); err != nil { + // Call stored procedures that handle merge + upsert atomically + if len(users) > 0 { + var merges []struct { + LoserID uuid.UUID `gorm:"column:loser_id"` + WinnerID uuid.UUID `gorm:"column:winner_id"` + } + if err := tx.Raw("SELECT * FROM merge_and_upsert_external_users(?)", tempUsers).Scan(&merges).Error; err != nil { tx.Rollback() - return counts, fmt.Errorf("failed to setup temp user groups: %w", err) + return counts, nil, fmt.Errorf("failed to merge and upsert external users: %w", err) + } + for _, m := range merges { + userIDMap[m.LoserID] = m.WinnerID } + counts.usersSaved = len(users) } - // Upsert: users - if len(users) > 0 { - r := tx.Exec(fmt.Sprintf(` - INSERT INTO external_users (id, aliases, name, account_id, user_type, email, scraper_id, created_at, updated_at, created_by) - SELECT id, aliases, name, account_id, user_type, email, scraper_id, created_at, updated_at, created_by - FROM %s - ON CONFLICT (id) DO UPDATE SET - aliases = NULLIF(ARRAY(SELECT DISTINCT unnest FROM unnest(external_users.aliases || EXCLUDED.aliases) ORDER BY 1), '{}'::text[]), - name = EXCLUDED.name, account_id = EXCLUDED.account_id, - user_type = EXCLUDED.user_type, email = EXCLUDED.email, - updated_at = EXCLUDED.updated_at, deleted_at = NULL - `, tempUsers)) - if r.Error != nil { + if len(groups) > 0 { + var merges []struct { + LoserID uuid.UUID `gorm:"column:loser_id"` + WinnerID uuid.UUID `gorm:"column:winner_id"` + } + if err := tx.Raw("SELECT * FROM merge_and_upsert_external_groups(?)", tempGroups).Scan(&merges).Error; err != nil { tx.Rollback() - return counts, fmt.Errorf("failed to upsert external users: %w", r.Error) + return counts, nil, fmt.Errorf("failed to merge and upsert external groups: %w", err) + } + for _, m := range merges { + groupIDMap[m.LoserID] = m.WinnerID } - counts.usersSaved = int(r.RowsAffected) + counts.groupsSaved = len(groups) } - // Upsert: groups - if len(groups) > 0 { - r := tx.Exec(fmt.Sprintf(` - INSERT INTO external_groups (id, aliases, name, account_id, scraper_id, group_type, created_at, updated_at) - SELECT id, aliases, name, account_id, scraper_id, group_type, created_at, updated_at - FROM %s - ON CONFLICT (id) DO UPDATE SET - aliases = NULLIF(ARRAY(SELECT DISTINCT unnest FROM unnest(external_groups.aliases || EXCLUDED.aliases) ORDER BY 1), '{}'::text[]), - name = EXCLUDED.name, account_id = EXCLUDED.account_id, - group_type = EXCLUDED.group_type, - updated_at = EXCLUDED.updated_at, deleted_at = NULL - `, tempGroups)) - if r.Error != nil { + if len(roles) > 0 { + var merges []struct { + LoserID uuid.UUID `gorm:"column:loser_id"` + WinnerID uuid.UUID `gorm:"column:winner_id"` + } + if err := tx.Raw("SELECT * FROM merge_and_upsert_external_roles(?)", tempRoles).Scan(&merges).Error; err != nil { tx.Rollback() - return counts, fmt.Errorf("failed to upsert external groups: %w", r.Error) + return counts, nil, fmt.Errorf("failed to merge and upsert external roles: %w", err) } - counts.groupsSaved = int(r.RowsAffected) + counts.rolesSaved = len(roles) } - // Upsert: roles - if len(roles) > 0 { - r := tx.Exec(fmt.Sprintf(` - INSERT INTO external_roles (id, aliases, name, account_id, role_type, description, scraper_id, application_id, created_at, updated_at) - SELECT id, aliases, name, account_id, role_type, description, scraper_id, application_id, created_at, updated_at - FROM %s - ON CONFLICT (id) DO UPDATE SET - aliases = NULLIF(ARRAY(SELECT DISTINCT unnest FROM unnest(external_roles.aliases || EXCLUDED.aliases) ORDER BY 1), '{}'::text[]), - name = EXCLUDED.name, account_id = EXCLUDED.account_id, - role_type = EXCLUDED.role_type, description = EXCLUDED.description, - updated_at = EXCLUDED.updated_at, deleted_at = NULL - `, tempRoles)) - if r.Error != nil { + if len(userGroups) > 0 { + remappedUserGroups := remapExternalUserGroups(userGroups, userIDMap, groupIDMap) + if err := createTempAndInsert(tx, tempUserGroups, "external_user_groups", remappedUserGroups); err != nil { tx.Rollback() - return counts, fmt.Errorf("failed to upsert external roles: %w", r.Error) + return counts, nil, fmt.Errorf("failed to setup temp user groups: %w", err) } - counts.rolesSaved = int(r.RowsAffected) } - // Upsert: user_groups if len(userGroups) > 0 { r := tx.Exec(fmt.Sprintf(` INSERT INTO external_user_groups (external_user_id, external_group_id, created_at) - SELECT external_user_id, external_group_id, created_at - FROM %s + SELECT external_user_id, external_group_id, created_at FROM %s ON CONFLICT (external_user_id, external_group_id) DO UPDATE SET deleted_at = NULL WHERE external_user_groups.deleted_at IS NOT NULL `, tempUserGroups)) if r.Error != nil { tx.Rollback() - return counts, fmt.Errorf("failed to upsert external user groups: %w", r.Error) + return counts, nil, fmt.Errorf("failed to upsert external user groups: %w", r.Error) } } @@ -418,7 +351,7 @@ func upsertExternalEntities( AND t.external_group_id = external_user_groups.external_group_id) `, tempUserGroups), *scraperID).Error; err != nil { tx.Rollback() - return counts, fmt.Errorf("failed to delete stale external user groups: %w", err) + return counts, nil, fmt.Errorf("failed to delete stale external user groups: %w", err) } } else if len(users) > 0 || len(groups) > 0 { if err := tx.Exec(` @@ -427,54 +360,67 @@ func upsertExternalEntities( AND external_user_id IN (SELECT id FROM external_users WHERE scraper_id = ?) `, *scraperID).Error; err != nil { tx.Rollback() - return counts, fmt.Errorf("failed to delete stale external user groups: %w", err) + return counts, nil, fmt.Errorf("failed to delete stale external user groups: %w", err) } } } - // FIXME: add stale deletion for external_users, external_groups, and external_roles - if err := tx.Commit().Error; err != nil { - return counts, fmt.Errorf("failed to commit external entities transaction: %w", err) + return counts, nil, fmt.Errorf("failed to commit external entities transaction: %w", err) } - return counts, nil + return counts, userIDMap, nil } -// ensureExternalUserFromAliases creates a minimal external user from aliases if none exists. func ensureExternalUserFromAliases(ctx api.ScrapeContext, aliases []string, scraperID *uuid.UUID) error { sort.Strings(aliases) + id, err := hash.DeterministicUUID(aliases) if err != nil { return fmt.Errorf("failed to generate deterministic UUID: %w", err) } + now := time.Now() - user := dutyModels.ExternalUser{ - ID: id, - Aliases: aliases, - ScraperID: lo.FromPtr(scraperID), - CreatedAt: now, - UpdatedAt: &now, - } - if err := ctx.DB().Exec(` - INSERT INTO external_users (id, aliases, scraper_id, account_id, created_at, updated_at) - VALUES (?, ?, ?, '', ?, ?) - ON CONFLICT (id) DO UPDATE SET - aliases = NULLIF(ARRAY(SELECT DISTINCT unnest FROM unnest(external_users.aliases || EXCLUDED.aliases) ORDER BY 1), '{}'::text[]), - updated_at = EXCLUDED.updated_at, deleted_at = NULL - `, user.ID, pq.StringArray(user.Aliases), user.ScraperID, user.CreatedAt, user.UpdatedAt).Error; err != nil { - return fmt.Errorf("failed to upsert external user: %w", err) - } - ExternalUserIDCache.Set(user.ID.String(), user.ID, cache.DefaultExpiration) + + tx := ctx.DB().Begin() + if tx.Error != nil { + return fmt.Errorf("failed to begin transaction: %w", tx.Error) + } + + tempTable := fmt.Sprintf("_ext_users_ensure_%s", sanitizeForTempTable(id.String())) + if err := tx.Exec(fmt.Sprintf(`CREATE TEMP TABLE %s (LIKE external_users INCLUDING ALL) ON COMMIT DROP`, tempTable)).Error; err != nil { + tx.Rollback() + return fmt.Errorf("failed to create temp table: %w", err) + } + + if err := tx.Exec(fmt.Sprintf(` + INSERT INTO %s (id, aliases, scraper_id, account_id, name, user_type, created_at, updated_at) + VALUES (?, ?, ?, '', '', '', ?, ?) + `, tempTable), id, pq.StringArray(aliases), lo.FromPtr(scraperID), now, now).Error; err != nil { + tx.Rollback() + return fmt.Errorf("failed to insert into temp table: %w", err) + } + + var merges []struct { + LoserID uuid.UUID `gorm:"column:loser_id"` + WinnerID uuid.UUID `gorm:"column:winner_id"` + } + if err := tx.Raw("SELECT * FROM merge_and_upsert_external_users(?)", tempTable).Scan(&merges).Error; err != nil { + tx.Rollback() + return fmt.Errorf("failed to merge and upsert: %w", err) + } + + if err := tx.Commit().Error; err != nil { + return fmt.Errorf("failed to commit: %w", err) + } + + ExternalUserIDCache.Set(id.String(), id, cache.DefaultExpiration) for _, alias := range aliases { - ExternalUserCache.Set(alias, user.ID, cache.DefaultExpiration) + ExternalUserCache.Set(alias, id, cache.DefaultExpiration) } return nil } -// dedupeByID merges duplicate entities (by ID) keeping the first occurrence -// and appending unique aliases from later duplicates. -// Entities with a nil ID are passed through as-is. func dedupeByID[T any]( items []T, getID func(T) uuid.UUID, @@ -512,3 +458,10 @@ func createTempAndInsert[T any](tx *gorm.DB, tempTable, sourceTable string, item } return nil } + +func appendUnique(slice []string, item string) []string { + if !slices.Contains(slice, item) { + return append(slice, item) + } + return slice +} diff --git a/db/external_entities_test.go b/db/external_entities_test.go new file mode 100644 index 000000000..a109e1f0e --- /dev/null +++ b/db/external_entities_test.go @@ -0,0 +1,43 @@ +package db + +import ( + "github.com/flanksource/duty/models" + "github.com/google/uuid" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("remapExternalUserGroups", func() { + It("remaps user and group ids independently", func() { + userLoser := uuid.MustParse("00000000-0000-0000-0000-000000000001") + userWinner := uuid.MustParse("00000000-0000-0000-0000-000000000002") + groupLoser := uuid.MustParse("00000000-0000-0000-0000-000000000003") + groupWinner := uuid.MustParse("00000000-0000-0000-0000-000000000004") + + remapped := remapExternalUserGroups([]models.ExternalUserGroup{ + { + ExternalUserID: groupLoser, + ExternalGroupID: userLoser, + }, + { + ExternalUserID: userLoser, + ExternalGroupID: groupLoser, + }, + }, map[uuid.UUID]uuid.UUID{ + userLoser: userWinner, + }, map[uuid.UUID]uuid.UUID{ + groupLoser: groupWinner, + }) + + Expect(remapped).To(Equal([]models.ExternalUserGroup{ + { + ExternalUserID: groupLoser, + ExternalGroupID: userLoser, + }, + { + ExternalUserID: userWinner, + ExternalGroupID: groupWinner, + }, + })) + }) +}) diff --git a/db/external_resolver.go b/db/external_resolver.go index 997593131..9627fbeb9 100644 --- a/db/external_resolver.go +++ b/db/external_resolver.go @@ -23,7 +23,7 @@ func NewDBResolver(ctx api.ScrapeContext) *DBResolver { func (d *DBResolver) SyncExternalUsers(users []dutyModels.ExternalUser, scraperID *uuid.UUID) ([]dutyModels.ExternalUser, map[uuid.UUID]uuid.UUID, error) { now := time.Now() - resolved, _, idMap, err := resolveExternalUsers(d.ctx, users, scraperID, now) + resolved, _, err := resolveExternalUsers(d.ctx, users, scraperID, now) if err != nil { return nil, nil, err } @@ -32,12 +32,12 @@ func (d *DBResolver) SyncExternalUsers(users []dutyModels.ExternalUser, scraperI ExternalUserCache.Set(alias, u.ID, cache.DefaultExpiration) } } - return resolved, idMap, nil + return resolved, nil, nil } func (d *DBResolver) SyncExternalGroups(groups []dutyModels.ExternalGroup, scraperID *uuid.UUID) ([]dutyModels.ExternalGroup, map[uuid.UUID]uuid.UUID, error) { now := time.Now() - resolved, _, idMap, err := resolveExternalGroups(d.ctx, groups, scraperID, now) + resolved, _, err := resolveExternalGroups(d.ctx, groups, scraperID, now) if err != nil { return nil, nil, err } @@ -46,7 +46,7 @@ func (d *DBResolver) SyncExternalGroups(groups []dutyModels.ExternalGroup, scrap ExternalGroupCache.Set(alias, g.ID, cache.DefaultExpiration) } } - return resolved, idMap, nil + return resolved, nil, nil } func (d *DBResolver) SyncExternalRoles(roles []dutyModels.ExternalRole, scraperID *uuid.UUID) ([]dutyModels.ExternalRole, error) { diff --git a/fixtures/data/external_entities_no_merge.json b/fixtures/data/external_entities_no_merge.json new file mode 100644 index 000000000..c50d6245c --- /dev/null +++ b/fixtures/data/external_entities_no_merge.json @@ -0,0 +1,17 @@ +{ + "id": "no-merge-org", + "external_users": [ + { + "name": "Distinct User A", + "account_id": "org-no-merge", + "user_type": "human", + "aliases": ["distinct-a1", "distinct-a2"] + }, + { + "name": "Distinct User B", + "account_id": "org-no-merge", + "user_type": "human", + "aliases": ["distinct-b1", "distinct-b2"] + } + ] +} diff --git a/fixtures/data/external_groups_merge_bridge.json b/fixtures/data/external_groups_merge_bridge.json new file mode 100644 index 000000000..ed413b0e5 --- /dev/null +++ b/fixtures/data/external_groups_merge_bridge.json @@ -0,0 +1,11 @@ +{ + "id": "merge-bridge-groups-org", + "external_groups": [ + { + "name": "Bridge Group", + "account_id": "org-merge", + "group_type": "security", + "aliases": ["grp-alias-b", "grp-alias-c"] + } + ] +} diff --git a/fixtures/data/external_roles_merge_bridge.json b/fixtures/data/external_roles_merge_bridge.json new file mode 100644 index 000000000..496f8ddb6 --- /dev/null +++ b/fixtures/data/external_roles_merge_bridge.json @@ -0,0 +1,11 @@ +{ + "id": "merge-bridge-roles-org", + "external_roles": [ + { + "name": "Bridge Role", + "account_id": "org-merge", + "role_type": "security", + "aliases": ["role-alias-b", "role-alias-c"] + } + ] +} diff --git a/fixtures/data/external_users_merge_batch_transitive.json b/fixtures/data/external_users_merge_batch_transitive.json new file mode 100644 index 000000000..c293c4e47 --- /dev/null +++ b/fixtures/data/external_users_merge_batch_transitive.json @@ -0,0 +1,23 @@ +{ + "id": "transitive-merge-org", + "external_users": [ + { + "name": "User A", + "account_id": "org-transitive", + "user_type": "human", + "aliases": ["chain-1", "chain-2"] + }, + { + "name": "User B", + "account_id": "org-transitive", + "user_type": "human", + "aliases": ["chain-2", "chain-3"] + }, + { + "name": "User C", + "account_id": "org-transitive", + "user_type": "human", + "aliases": ["chain-3", "chain-4"] + } + ] +} diff --git a/fixtures/data/external_users_merge_bridge.json b/fixtures/data/external_users_merge_bridge.json new file mode 100644 index 000000000..a81fc849f --- /dev/null +++ b/fixtures/data/external_users_merge_bridge.json @@ -0,0 +1,12 @@ +{ + "id": "merge-bridge-org", + "external_users": [ + { + "name": "Bridge User", + "account_id": "org-merge", + "user_type": "human", + "email": "bridge@example.com", + "aliases": ["alias-b", "alias-c"] + } + ] +} diff --git a/go.mod b/go.mod index 0372a1610..b9b80ff7f 100644 --- a/go.mod +++ b/go.mod @@ -1,17 +1,17 @@ module github.com/flanksource/config-db -go 1.25.1 +go 1.25.6 require ( github.com/flanksource/artifacts v1.0.21 github.com/flanksource/clicky v1.19.0 github.com/flanksource/commons v1.48.0 github.com/flanksource/deps v1.0.24 - github.com/flanksource/duty v1.0.1214 + github.com/flanksource/duty v1.0.1218 github.com/flanksource/gomplate/v3 v3.24.71 github.com/flanksource/is-healthy v1.0.84 github.com/flanksource/ketall v1.1.9 - github.com/flanksource/kopper v1.0.13 + github.com/flanksource/kopper v1.0.18 ) require ( @@ -157,12 +157,10 @@ require ( github.com/aws/aws-sdk-go-v2/service/kms v1.49.5 // indirect github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect - github.com/bahlo/generic-list-go v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect github.com/bmatcuk/doublestar v1.3.4 // indirect github.com/bmatcuk/doublestar/v4 v4.10.0 // indirect - github.com/buger/jsonparser v1.1.1 // indirect github.com/casbin/casbin/v2 v2.135.0 // indirect github.com/casbin/casbin/v3 v3.8.1 // indirect github.com/casbin/gorm-adapter/v3 v3.41.0 // indirect @@ -253,7 +251,6 @@ require ( github.com/hashicorp/hcl/v2 v2.24.0 // indirect github.com/henvic/httpretty v0.1.4 // indirect github.com/hirochachacha/go-smb2 v1.1.0 // indirect - github.com/invopop/jsonschema v0.13.0 // indirect github.com/itchyny/gojq v0.12.18 // indirect github.com/itchyny/timefmt-go v0.1.7 // indirect github.com/jackc/chunkreader/v2 v2.0.1 // indirect @@ -285,7 +282,6 @@ require ( github.com/lrita/cmap v0.0.0-20231108122212-cb084a67f554 // indirect github.com/lucasb-eyer/go-colorful v1.3.0 // indirect github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 // indirect - github.com/mailru/easyjson v0.9.1 // indirect github.com/microsoft/kiota-abstractions-go v1.9.3 // indirect github.com/microsoft/kiota-authentication-azure-go v1.3.1 // indirect github.com/microsoft/kiota-http-go v1.5.4 // indirect @@ -347,12 +343,8 @@ require ( github.com/vadimi/go-http-ntlm v1.0.3 // indirect github.com/vadimi/go-http-ntlm/v2 v2.5.0 // indirect github.com/vadimi/go-ntlm v1.2.1 // indirect - github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect github.com/x448/float16 v0.8.4 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect - github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect - github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect - github.com/xeipuuv/gojsonschema v1.2.0 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect github.com/xuri/efp v0.0.1 // indirect @@ -513,7 +505,7 @@ require ( // replace github.com/flanksource/deps => ../deps -// replace github.com/flanksource/duty => ../duty +replace github.com/flanksource/duty => ../duty // replace github.com/flanksource/gomplate => ../gomplate diff --git a/go.sum b/go.sum index 3d5b53108..7932b1450 100644 --- a/go.sum +++ b/go.sum @@ -330,8 +330,6 @@ github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiE github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= github.com/aymanbagabas/go-udiff v0.2.0 h1:TK0fH4MteXUDspT88n8CKzvK0X9O2xu9yQjWpi6yML8= github.com/aymanbagabas/go-udiff v0.2.0/go.mod h1:RE4Ex0qsGkTAJoQdQQCA0uG+nAzJO/pI/QwceO5fgrA= -github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk= -github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -348,8 +346,6 @@ github.com/bmatcuk/doublestar/v4 v4.10.0 h1:zU9WiOla1YA122oLM6i4EXvGW62DvKZVxIe6 github.com/bmatcuk/doublestar/v4 v4.10.0/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= -github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= -github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/cactus/go-statsd-client/statsd v0.0.0-20200423205355-cb0885a1018c/go.mod h1:l/bIBLeOl9eX+wxJAzxS4TveKRtAqlyDpHjhkfO0MEI= github.com/casbin/casbin/v2 v2.135.0 h1:6BLkMQiGotYyS5yYeWgW19vxqugUlvHFkFiLnLR/bxk= github.com/casbin/casbin/v2 v2.135.0/go.mod h1:FmcfntdXLTcYXv/hxgNntcRPqAbwOG9xsism0yXT+18= @@ -476,16 +472,14 @@ github.com/flanksource/commons v1.48.0 h1:ztUu9DuOzaMuJNMgJCQUe6YpJw0CPBX9dyf770 github.com/flanksource/commons v1.48.0/go.mod h1:MeUQTNCzh5WhSwpbE1mdV1Atri1srMgIufotYRuZn14= github.com/flanksource/deps v1.0.24 h1:X23SZb2nxCDsS1wRiuqyvUYpA3KQxcQR9YfB8H/oTgo= github.com/flanksource/deps v1.0.24/go.mod h1:emsZRgkplqo9xe2wTAPyFm6XWk8CS6H9cxjp6eYR1Vg= -github.com/flanksource/duty v1.0.1214 h1:PIHBKw5PtV1X2cTZGCHI6VcGIbpNRCNtu/+Gpx8f5Nk= -github.com/flanksource/duty v1.0.1214/go.mod h1:wDp4j7vmd+VGgryREmzJdS90sgqa4ICNT/kx5eVj2sc= github.com/flanksource/gomplate/v3 v3.24.71 h1:c610TQ+YEhA39bJIl5wiG3ynWxyrPlbgu4BoCHxlJfo= github.com/flanksource/gomplate/v3 v3.24.71/go.mod h1:PMJGo4K81b0TB0FrC8WJDE9+vW3X/zWHQ8eGpCMYXmo= github.com/flanksource/is-healthy v1.0.84 h1:d05/Uri+y3b6AkQAItxwjwLwHq2BZHXER42n9AtRbzo= github.com/flanksource/is-healthy v1.0.84/go.mod h1:6/KOjVUeevIbaIaVtSDhy6qsnbqyy5WPZNlRGQCBxcw= github.com/flanksource/ketall v1.1.9 h1:9AaQ6VFDYMGqQzsyZJcXMo5qgHCJHNi1IqAK8tIxS+k= github.com/flanksource/ketall v1.1.9/go.mod h1:7p2N6gm5kUTMA+21x4bdnfWupOPVxUW0jy7zksV6GUU= -github.com/flanksource/kopper v1.0.13 h1:BKSGqQG+7omkte9Xq/nUJVLn80T+Ck6qxbQKyoNl6nk= -github.com/flanksource/kopper v1.0.13/go.mod h1:jk5JN4raPsIsANlkk7ZOFmsJ8J0yxTxaUD3SEJFLwZE= +github.com/flanksource/kopper v1.0.18 h1:AV6mCnn+ANRvPzduoF2pbEShjiR0GOLT24xCD+rjysk= +github.com/flanksource/kopper v1.0.18/go.mod h1:n1BAjij7zsX2zcwjSC1mvNkFlwPSkk4QHWg7n32eg0E= github.com/flanksource/kubectl-neat v1.0.4 h1:t5/9CqgE84oEtB0KitgJ2+WIeLfD+RhXSxYrqb4X8yI= github.com/flanksource/kubectl-neat v1.0.4/go.mod h1:Un/Voyh3cmiZNKQrW/TkAl28nAA7vwnwDGVjRErKjOw= github.com/flanksource/sandbox-runtime v1.0.1 h1:zBzNx9GoZILo1ot4qI2wd/gqny0vejvex3xnJzsmvgE= @@ -778,8 +772,6 @@ github.com/hirochachacha/go-smb2 v1.1.0/go.mod h1:8F1A4d5EZzrGu5R7PU163UcMRDJQl4 github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E= -github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0= github.com/itchyny/gojq v0.12.18 h1:gFGHyt/MLbG9n6dqnvlliiya2TaMMh6FFaR2b1H6Drc= github.com/itchyny/gojq v0.12.18/go.mod h1:4hPoZ/3lN9fDL1D+aK7DY1f39XZpY9+1Xpjz8atrEkg= github.com/itchyny/timefmt-go v0.1.7 h1:xyftit9Tbw+Dc/huSSPJaEmX1TVL8lw5vxjJLK4GMMA= @@ -944,8 +936,6 @@ github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 h1:PwQumkgq4/acIi github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3/go.mod h1:autxFIvghDt3jPTLoqZ9OZ7s9qTGNAWmYCjVFWPX/zg= github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE= github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= -github.com/mailru/easyjson v0.9.1 h1:LbtsOm5WAswyWbvTEOqhypdPeZzHavpZx96/n553mR8= -github.com/mailru/easyjson v0.9.1/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo= github.com/maruel/natural v1.1.1/go.mod h1:v+Rfd79xlw1AgVBjbO0BEQmptqb5HvL/k9GRHB7ZKEg= github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ= @@ -1259,8 +1249,6 @@ github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6Kllzaw github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo= github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= -github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc= -github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= @@ -1268,12 +1256,6 @@ github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= -github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c= -github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= -github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= -github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= -github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74= -github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= github.com/xhit/go-str2duration v1.2.0/go.mod h1:3cPSlfZlUHVlneIVfePFWcJZsuwf+P1v2SRTV4cUmp4= github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo= diff --git a/scrapers/external_entity_merge_test.go b/scrapers/external_entity_merge_test.go new file mode 100644 index 000000000..5efdd24fd --- /dev/null +++ b/scrapers/external_entity_merge_test.go @@ -0,0 +1,165 @@ +package scrapers + +import ( + "fmt" + "strings" + "time" + + "github.com/flanksource/config-db/db/models" + dutymodels "github.com/flanksource/duty/models" + "github.com/google/uuid" + "github.com/lib/pq" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/samber/lo" + k8sTypes "k8s.io/apimachinery/pkg/types" +) + +var _ = Describe("external entity merges", func() { + It("merges user dependents without violating unique constraints", func() { + scrapeConfig := getConfigSpec("file-config-access") + + scraperModel, err := scrapeConfig.ToModel() + Expect(err).NotTo(HaveOccurred()) + scraperModel.Source = dutymodels.SourceUI + Expect(DefaultContext.DB().Create(&scraperModel).Error).NotTo(HaveOccurred()) + scrapeConfig.SetUID(k8sTypes.UID(scraperModel.ID.String())) + + configID := uuid.New() + configItem := models.ConfigItem{ + ID: configID.String(), + ScraperID: &scraperModel.ID, + ConfigClass: "Test", + Type: "Config", + ExternalID: pq.StringArray{"merge-collision"}, + } + Expect(DefaultContext.DB().Create(&configItem).Error).NotTo(HaveOccurred()) + + now := time.Now().UTC().Truncate(time.Microsecond) + winnerID := uuid.MustParse("00000000-0000-0000-0000-000000000001") + loserID := uuid.MustParse("00000000-0000-0000-0000-000000000002") + bridgeID := uuid.MustParse("00000000-0000-0000-0000-000000000003") + + winner := dutymodels.ExternalUser{ + ID: winnerID, + Name: "winner", + Aliases: pq.StringArray{"winner-alias"}, + UserType: "user", + ScraperID: scraperModel.ID, + CreatedAt: now, + UpdatedAt: lo.ToPtr(now), + } + loser := dutymodels.ExternalUser{ + ID: loserID, + Name: "loser", + Aliases: pq.StringArray{"loser-alias"}, + UserType: "user", + ScraperID: scraperModel.ID, + CreatedAt: now, + UpdatedAt: lo.ToPtr(now), + } + Expect(DefaultContext.DB().Create(&winner).Error).NotTo(HaveOccurred()) + Expect(DefaultContext.DB().Create(&loser).Error).NotTo(HaveOccurred()) + + source := "merge-test" + winnerAccess := dutymodels.ConfigAccess{ + ID: "winner-access", + ConfigID: configID, + ExternalUserID: &winnerID, + ScraperID: &scraperModel.ID, + Source: &source, + CreatedAt: now.Add(-2 * time.Hour), + } + loserAccess := dutymodels.ConfigAccess{ + ID: "loser-access", + ConfigID: configID, + ExternalUserID: &loserID, + ScraperID: &scraperModel.ID, + Source: &source, + CreatedAt: now.Add(-time.Hour), + } + Expect(DefaultContext.DB().Create(&winnerAccess).Error).NotTo(HaveOccurred()) + Expect(DefaultContext.DB().Create(&loserAccess).Error).NotTo(HaveOccurred()) + + winnerLogCount := 2 + loserLogCount := 3 + winnerLog := dutymodels.ConfigAccessLog{ + ConfigID: configID, + ExternalUserID: winnerID, + ScraperID: scraperModel.ID, + CreatedAt: now.Add(-2 * time.Hour), + Count: &winnerLogCount, + } + loserLog := dutymodels.ConfigAccessLog{ + ConfigID: configID, + ExternalUserID: loserID, + ScraperID: scraperModel.ID, + CreatedAt: now.Add(-30 * time.Minute), + MFA: true, + Count: &loserLogCount, + } + Expect(DefaultContext.DB().Create(&winnerLog).Error).NotTo(HaveOccurred()) + Expect(DefaultContext.DB().Create(&loserLog).Error).NotTo(HaveOccurred()) + + defer func() { + Expect(DefaultContext.DB().Where("config_id = ? AND scraper_id = ?", configID, scraperModel.ID).Delete(&dutymodels.ConfigAccessLog{}).Error).NotTo(HaveOccurred()) + Expect(DefaultContext.DB().Where("config_id = ? AND scraper_id = ?", configID, scraperModel.ID).Delete(&dutymodels.ConfigAccess{}).Error).NotTo(HaveOccurred()) + Expect(DefaultContext.DB().Where("scraper_id = ?", scraperModel.ID).Delete(&dutymodels.ExternalUser{}).Error).NotTo(HaveOccurred()) + Expect(DefaultContext.DB().Delete(&models.ConfigItem{}, "id = ?", configItem.ID).Error).NotTo(HaveOccurred()) + Expect(DefaultContext.DB().Delete(&scraperModel).Error).NotTo(HaveOccurred()) + }() + + tx := DefaultContext.DB().Begin() + Expect(tx.Error).NotTo(HaveOccurred()) + + tempTable := fmt.Sprintf("_merge_users_%s", strings.ReplaceAll(uuid.NewString(), "-", "_")) + Expect(tx.Exec(fmt.Sprintf(`CREATE TEMP TABLE %s (LIKE external_users INCLUDING ALL) ON COMMIT DROP`, tempTable)).Error).NotTo(HaveOccurred()) + + bridge := dutymodels.ExternalUser{ + ID: bridgeID, + Name: "bridge", + Aliases: pq.StringArray{"winner-alias", "loser-alias"}, + UserType: "user", + ScraperID: scraperModel.ID, + CreatedAt: now, + UpdatedAt: lo.ToPtr(now), + } + Expect(tx.Table(tempTable).Create(&bridge).Error).NotTo(HaveOccurred()) + + var merges []struct { + LoserID uuid.UUID `gorm:"column:loser_id"` + WinnerID uuid.UUID `gorm:"column:winner_id"` + } + Expect(tx.Raw("SELECT * FROM merge_and_upsert_external_users(?)", tempTable).Scan(&merges).Error).NotTo(HaveOccurred()) + Expect(tx.Commit().Error).NotTo(HaveOccurred()) + + Expect(merges).To(ContainElement(struct { + LoserID uuid.UUID `gorm:"column:loser_id"` + WinnerID uuid.UUID `gorm:"column:winner_id"` + }{ + LoserID: loserID, + WinnerID: winnerID, + })) + + var activeAccesses []dutymodels.ConfigAccess + Expect(DefaultContext.DB(). + Where("config_id = ? AND deleted_at IS NULL", configID). + Find(&activeAccesses).Error).NotTo(HaveOccurred()) + Expect(activeAccesses).To(HaveLen(1)) + Expect(activeAccesses[0].ExternalUserID).NotTo(BeNil()) + Expect(*activeAccesses[0].ExternalUserID).To(Equal(winnerID)) + + var logs []dutymodels.ConfigAccessLog + Expect(DefaultContext.DB(). + Where("config_id = ? AND scraper_id = ?", configID, scraperModel.ID). + Find(&logs).Error).NotTo(HaveOccurred()) + Expect(logs).To(HaveLen(1)) + Expect(logs[0].ExternalUserID).To(Equal(winnerID)) + Expect(lo.FromPtr(logs[0].Count)).To(Equal(winnerLogCount + loserLogCount)) + Expect(logs[0].CreatedAt.Equal(loserLog.CreatedAt)).To(BeTrue()) + + var mergedLoser dutymodels.ExternalUser + Expect(DefaultContext.DB().First(&mergedLoser, "id = ?", loserID).Error).NotTo(HaveOccurred()) + Expect(mergedLoser.DeletedAt).NotTo(BeNil()) + }) +}) diff --git a/scrapers/extract/testdata/e2e/external_entities_no_merge.yaml b/scrapers/extract/testdata/e2e/external_entities_no_merge.yaml new file mode 100644 index 000000000..b2ca12915 --- /dev/null +++ b/scrapers/extract/testdata/e2e/external_entities_no_merge.yaml @@ -0,0 +1,11 @@ +spec: + full: true + file: + - type: Organization + class: Organization + id: $.id + paths: + - fixtures/data/external_entities_no_merge.json +assertions: + # 2 users with completely disjoint aliases should stay separate + - size(db.external_users(scraper_id)) == 2 diff --git a/scrapers/extract/testdata/e2e/external_groups_merge_bridge.yaml b/scrapers/extract/testdata/e2e/external_groups_merge_bridge.yaml new file mode 100644 index 000000000..13dce7ef9 --- /dev/null +++ b/scrapers/extract/testdata/e2e/external_groups_merge_bridge.yaml @@ -0,0 +1,23 @@ +spec: + full: true + file: + - type: Organization + class: Organization + id: $.id + paths: + - fixtures/data/external_groups_merge_bridge.json +pre_populate: + external_groups: + - name: "Group A" + aliases: ["grp-alias-a", "grp-alias-b"] + group_type: security + account_id: org-merge + - name: "Group C" + aliases: ["grp-alias-c", "grp-alias-d"] + group_type: security + account_id: org-merge +assertions: + # Bridge group shares grp-alias-b with Group A and grp-alias-c with Group C + - size(db.external_groups(scraper_id)) == 1 + - db.external_groups(scraper_id)[0].aliases.contains("grp-alias-a") + - db.external_groups(scraper_id)[0].aliases.contains("grp-alias-d") diff --git a/scrapers/extract/testdata/e2e/external_groups_merge_config_access_collision.yaml b/scrapers/extract/testdata/e2e/external_groups_merge_config_access_collision.yaml new file mode 100644 index 000000000..e16c23c54 --- /dev/null +++ b/scrapers/extract/testdata/e2e/external_groups_merge_config_access_collision.yaml @@ -0,0 +1,38 @@ +spec: + full: true + file: + - type: Organization + class: Organization + id: $.id + paths: + - fixtures/data/external_groups_merge_bridge.json +pre_populate: + configs: + - config_class: Organization + type: Organization + external_id: ["merge-bridge-groups-org"] + config: '{"name":"Merge Bridge Groups Org"}' + external_groups: + - name: "Group A" + aliases: ["grp-alias-a", "grp-alias-b"] + group_type: security + account_id: org-merge + - name: "Group C" + aliases: ["grp-alias-c", "grp-alias-d"] + group_type: security + account_id: org-merge + config_access: + - id: "group-access-winner" + config_external_id: "merge-bridge-groups-org" + external_group_aliases: ["grp-alias-a"] + source: fixture + created_at_offset_mins: -120 + - id: "group-access-loser" + config_external_id: "merge-bridge-groups-org" + external_group_aliases: ["grp-alias-d"] + source: fixture + created_at_offset_mins: -60 +assertions: + - size(db.external_groups(scraper_id)) == 1 + - size(db.config_access(scraper_id)) == 1 + - db.config_access(scraper_id)[0].external_group_id == db.external_groups(scraper_id)[0].id diff --git a/scrapers/extract/testdata/e2e/external_roles_merge_config_access_collision.yaml b/scrapers/extract/testdata/e2e/external_roles_merge_config_access_collision.yaml new file mode 100644 index 000000000..712721214 --- /dev/null +++ b/scrapers/extract/testdata/e2e/external_roles_merge_config_access_collision.yaml @@ -0,0 +1,38 @@ +spec: + full: true + file: + - type: Organization + class: Organization + id: $.id + paths: + - fixtures/data/external_roles_merge_bridge.json +pre_populate: + configs: + - config_class: Organization + type: Organization + external_id: ["merge-bridge-roles-org"] + config: '{"name":"Merge Bridge Roles Org"}' + external_roles: + - name: "Role A" + aliases: ["role-alias-a", "role-alias-b"] + role_type: security + account_id: org-merge + - name: "Role C" + aliases: ["role-alias-c", "role-alias-d"] + role_type: security + account_id: org-merge + config_access: + - id: "role-access-winner" + config_external_id: "merge-bridge-roles-org" + external_role_aliases: ["role-alias-a"] + source: fixture + created_at_offset_mins: -120 + - id: "role-access-loser" + config_external_id: "merge-bridge-roles-org" + external_role_aliases: ["role-alias-d"] + source: fixture + created_at_offset_mins: -60 +assertions: + - size(db.external_roles(scraper_id)) == 1 + - size(db.config_access(scraper_id)) == 1 + - db.config_access(scraper_id)[0].external_role_id == db.external_roles(scraper_id)[0].id diff --git a/scrapers/extract/testdata/e2e/external_users_merge_access_logs_collision.yaml b/scrapers/extract/testdata/e2e/external_users_merge_access_logs_collision.yaml new file mode 100644 index 000000000..7a462e27d --- /dev/null +++ b/scrapers/extract/testdata/e2e/external_users_merge_access_logs_collision.yaml @@ -0,0 +1,40 @@ +spec: + full: true + file: + - type: Organization + class: Organization + id: $.id + paths: + - fixtures/data/external_users_merge_bridge.json +pre_populate: + configs: + - config_class: Organization + type: Organization + external_id: ["merge-bridge-org"] + config: '{"name":"Merge Bridge Org"}' + external_users: + - name: "User A" + aliases: ["alias-a", "alias-b"] + user_type: human + account_id: org-merge + - name: "User C" + aliases: ["alias-c", "alias-d"] + user_type: human + account_id: org-merge + config_access_logs: + - config_external_id: "merge-bridge-org" + external_user_aliases: ["alias-a"] + count: 2 + created_at_offset_mins: -30 + - config_external_id: "merge-bridge-org" + external_user_aliases: ["alias-d"] + count: 3 + properties: + note: "merged" + created_at_offset_mins: 0 +assertions: + - size(db.external_users(scraper_id)) == 1 + - size(db.config_access_logs(scraper_id)) == 1 + - db.config_access_logs(scraper_id)[0].external_user_id == db.external_users(scraper_id)[0].id + - db.config_access_logs(scraper_id)[0].count == 5 + - db.config_access_logs(scraper_id)[0].properties['note'] == 'merged' diff --git a/scrapers/extract/testdata/e2e/external_users_merge_batch_transitive.yaml b/scrapers/extract/testdata/e2e/external_users_merge_batch_transitive.yaml new file mode 100644 index 000000000..1ac16e428 --- /dev/null +++ b/scrapers/extract/testdata/e2e/external_users_merge_batch_transitive.yaml @@ -0,0 +1,14 @@ +spec: + full: true + file: + - type: Organization + class: Organization + id: $.id + paths: + - fixtures/data/external_users_merge_batch_transitive.json +assertions: + # 3 users in batch with transitive alias chain: A(1,2) B(2,3) C(3,4) + # All share aliases transitively, should merge into 1 + - size(db.external_users(scraper_id)) == 1 + - db.external_users(scraper_id)[0].aliases.contains("chain-1") + - db.external_users(scraper_id)[0].aliases.contains("chain-4") diff --git a/scrapers/extract/testdata/e2e/external_users_merge_bridge.yaml b/scrapers/extract/testdata/e2e/external_users_merge_bridge.yaml new file mode 100644 index 000000000..a7e46949a --- /dev/null +++ b/scrapers/extract/testdata/e2e/external_users_merge_bridge.yaml @@ -0,0 +1,26 @@ +spec: + full: true + file: + - type: Organization + class: Organization + id: $.id + paths: + - fixtures/data/external_users_merge_bridge.json +pre_populate: + external_users: + - name: "User A" + aliases: ["alias-a", "alias-b"] + user_type: human + account_id: org-merge + - name: "User C" + aliases: ["alias-c", "alias-d"] + user_type: human + account_id: org-merge +assertions: + # Bridge user shares alias-b with User A and alias-c with User C + # After merge: 1 surviving user with all aliases, 1 soft-deleted + - size(db.external_users(scraper_id)) == 1 + - db.external_users(scraper_id)[0].aliases.contains("alias-a") + - db.external_users(scraper_id)[0].aliases.contains("alias-b") + - db.external_users(scraper_id)[0].aliases.contains("alias-c") + - db.external_users(scraper_id)[0].aliases.contains("alias-d") diff --git a/scrapers/extract/testdata/e2e/external_users_merge_config_access_collision.yaml b/scrapers/extract/testdata/e2e/external_users_merge_config_access_collision.yaml new file mode 100644 index 000000000..1388d60a8 --- /dev/null +++ b/scrapers/extract/testdata/e2e/external_users_merge_config_access_collision.yaml @@ -0,0 +1,38 @@ +spec: + full: true + file: + - type: Organization + class: Organization + id: $.id + paths: + - fixtures/data/external_users_merge_bridge.json +pre_populate: + configs: + - config_class: Organization + type: Organization + external_id: ["merge-bridge-org"] + config: '{"name":"Merge Bridge Org"}' + external_users: + - name: "User A" + aliases: ["alias-a", "alias-b"] + user_type: human + account_id: org-merge + - name: "User C" + aliases: ["alias-c", "alias-d"] + user_type: human + account_id: org-merge + config_access: + - id: "user-access-winner" + config_external_id: "merge-bridge-org" + external_user_aliases: ["alias-a"] + source: fixture + created_at_offset_mins: -120 + - id: "user-access-loser" + config_external_id: "merge-bridge-org" + external_user_aliases: ["alias-d"] + source: fixture + created_at_offset_mins: -60 +assertions: + - size(db.external_users(scraper_id)) == 1 + - size(db.config_access(scraper_id)) == 1 + - db.config_access(scraper_id)[0].external_user_id == db.external_users(scraper_id)[0].id diff --git a/scrapers/extract_e2e_test.go b/scrapers/extract_e2e_test.go index 6d99364db..8aafef01f 100644 --- a/scrapers/extract_e2e_test.go +++ b/scrapers/extract_e2e_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "os" "path/filepath" + "time" v1 "github.com/flanksource/config-db/api/v1" "github.com/flanksource/config-db/db" @@ -12,10 +13,12 @@ import ( dutymodels "github.com/flanksource/duty/models" "github.com/flanksource/gomplate/v3" "github.com/google/uuid" + "github.com/lib/pq" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/samber/lo" "gopkg.in/yaml.v3" + k8sTypes "k8s.io/apimachinery/pkg/types" ) type e2ePrePopulateConfig struct { @@ -25,14 +28,61 @@ type e2ePrePopulateConfig struct { Config string `yaml:"config"` } +type e2ePrePopulateExternalUser struct { + Name string `yaml:"name"` + Aliases []string `yaml:"aliases"` + Email string `yaml:"email,omitempty"` + UserType string `yaml:"user_type,omitempty"` + AccountID string `yaml:"account_id,omitempty"` +} + +type e2ePrePopulateExternalGroup struct { + Name string `yaml:"name"` + Aliases []string `yaml:"aliases"` + GroupType string `yaml:"group_type,omitempty"` + AccountID string `yaml:"account_id,omitempty"` +} + +type e2ePrePopulateExternalRole struct { + Name string `yaml:"name"` + Aliases []string `yaml:"aliases"` + RoleType string `yaml:"role_type,omitempty"` + AccountID string `yaml:"account_id,omitempty"` + Description string `yaml:"description,omitempty"` +} + +type e2ePrePopulateConfigAccess struct { + ID string `yaml:"id"` + ConfigExternalID string `yaml:"config_external_id"` + ExternalUserAliases []string `yaml:"external_user_aliases,omitempty"` + ExternalGroupAliases []string `yaml:"external_group_aliases,omitempty"` + ExternalRoleAliases []string `yaml:"external_role_aliases,omitempty"` + Source string `yaml:"source,omitempty"` + CreatedAtOffsetMins int `yaml:"created_at_offset_mins,omitempty"` +} + +type e2ePrePopulateConfigAccessLog struct { + ConfigExternalID string `yaml:"config_external_id"` + ExternalUserAliases []string `yaml:"external_user_aliases"` + Count int `yaml:"count,omitempty"` + MFA *bool `yaml:"mfa,omitempty"` + Properties map[string]any `yaml:"properties,omitempty"` + CreatedAtOffsetMins int `yaml:"created_at_offset_mins,omitempty"` +} + type e2ePrePopulate struct { - Configs []e2ePrePopulateConfig `yaml:"configs"` + Configs []e2ePrePopulateConfig `yaml:"configs"` + ExternalUsers []e2ePrePopulateExternalUser `yaml:"external_users"` + ExternalGroups []e2ePrePopulateExternalGroup `yaml:"external_groups"` + ExternalRoles []e2ePrePopulateExternalRole `yaml:"external_roles"` + ConfigAccess []e2ePrePopulateConfigAccess `yaml:"config_access"` + ConfigAccessLogs []e2ePrePopulateConfigAccessLog `yaml:"config_access_logs"` } type e2eFixture struct { - Spec map[string]any `yaml:"spec"` - PrePopulate e2ePrePopulate `yaml:"pre_populate"` - Assertions []string `yaml:"assertions"` + Spec map[string]any `yaml:"spec"` + PrePopulate e2ePrePopulate `yaml:"pre_populate"` + Assertions []string `yaml:"assertions"` } var _ = Describe("e2e extraction fixtures", func() { @@ -45,7 +95,6 @@ var _ = Describe("e2e extraction fixtures", func() { name := filepath.Base(fixturePath) It("e2e fixture: "+name, func() { - // At test time CWD is repo root (BeforeSuite does os.Chdir("..")) data, err := os.ReadFile("scrapers/" + fixturePath) Expect(err).ToNot(HaveOccurred()) @@ -54,7 +103,6 @@ var _ = Describe("e2e extraction fixtures", func() { Expect(fixture.Spec).ToNot(BeNil(), "e2e fixture %s must have a spec field", name) Expect(fixture.Assertions).ToNot(BeEmpty(), "fixture %s has no assertions", name) - // Validate spec has no unknown fields specJSON, err := json.Marshal(fixture.Spec) Expect(err).ToNot(HaveOccurred()) decoder := json.NewDecoder(bytes.NewReader(specJSON)) @@ -62,7 +110,6 @@ var _ = Describe("e2e extraction fixtures", func() { var specValidation v1.ScraperSpec Expect(decoder.Decode(&specValidation)).To(Succeed(), "spec in %s contains unknown fields", name) - // Build ScrapeConfig YAML from spec scrapeConfigYAML := buildScrapeConfigYAML(name, fixture.Spec) tmpFile, err := os.CreateTemp("", "e2e-fixture-*.yaml") Expect(err).ToNot(HaveOccurred()) @@ -76,10 +123,11 @@ var _ = Describe("e2e extraction fixtures", func() { Expect(configs).ToNot(BeEmpty()) config := configs[0] - // Pre-populate configs in DB var createdItems []string + configIDByExternalID := make(map[string]uuid.UUID) scraperModel, err := db.PersistScrapeConfigFromFile(DefaultContext, config) Expect(err).ToNot(HaveOccurred()) + config.SetUID(k8sTypes.UID(scraperModel.ID.String())) for _, preConfig := range fixture.PrePopulate.Configs { ci := &models.ConfigItem{ @@ -92,18 +140,128 @@ var _ = Describe("e2e extraction fixtures", func() { } Expect(DefaultContext.DB().Create(ci).Error).ToNot(HaveOccurred()) createdItems = append(createdItems, ci.ID) + for _, externalID := range preConfig.ExternalID { + configIDByExternalID[externalID] = uuid.MustParse(ci.ID) + } + } + + // Pre-populate external entities + now := time.Now() + userIDByAlias := make(map[string]uuid.UUID) + groupIDByAlias := make(map[string]uuid.UUID) + roleIDByAlias := make(map[string]uuid.UUID) + for _, u := range fixture.PrePopulate.ExternalUsers { + eu := dutymodels.ExternalUser{ + ID: uuid.New(), + Name: u.Name, + Aliases: pq.StringArray(u.Aliases), + UserType: u.UserType, + Tenant: u.AccountID, + Email: lo.Ternary(u.Email != "", &u.Email, nil), + ScraperID: scraperModel.ID, + CreatedAt: now, + UpdatedAt: &now, + } + Expect(DefaultContext.DB().Create(&eu).Error).ToNot(HaveOccurred()) + for _, alias := range u.Aliases { + userIDByAlias[alias] = eu.ID + } + } + for _, g := range fixture.PrePopulate.ExternalGroups { + eg := dutymodels.ExternalGroup{ + ID: uuid.New(), + Name: g.Name, + Aliases: pq.StringArray(g.Aliases), + GroupType: g.GroupType, + Tenant: g.AccountID, + ScraperID: scraperModel.ID, + CreatedAt: now, + UpdatedAt: &now, + } + Expect(DefaultContext.DB().Create(&eg).Error).ToNot(HaveOccurred()) + for _, alias := range g.Aliases { + groupIDByAlias[alias] = eg.ID + } + } + for _, r := range fixture.PrePopulate.ExternalRoles { + er := dutymodels.ExternalRole{ + ID: uuid.New(), + Name: r.Name, + Aliases: pq.StringArray(r.Aliases), + RoleType: r.RoleType, + Tenant: r.AccountID, + Description: r.Description, + ScraperID: &scraperModel.ID, + CreatedAt: now, + UpdatedAt: &now, + } + Expect(DefaultContext.DB().Create(&er).Error).ToNot(HaveOccurred()) + for _, alias := range r.Aliases { + roleIDByAlias[alias] = er.ID + } + } + + for _, ca := range fixture.PrePopulate.ConfigAccess { + row := dutymodels.ConfigAccess{ + ID: ca.ID, + ConfigID: configIDByExternalID[ca.ConfigExternalID], + ScraperID: &scraperModel.ID, + CreatedAt: now.Add(time.Duration(ca.CreatedAtOffsetMins) * time.Minute), + } + if len(ca.ExternalUserAliases) > 0 { + id, ok := userIDByAlias[ca.ExternalUserAliases[0]] + Expect(ok).To(BeTrue(), "missing pre-populated external user alias %s", ca.ExternalUserAliases[0]) + row.ExternalUserID = &id + } + if len(ca.ExternalGroupAliases) > 0 { + id, ok := groupIDByAlias[ca.ExternalGroupAliases[0]] + Expect(ok).To(BeTrue(), "missing pre-populated external group alias %s", ca.ExternalGroupAliases[0]) + row.ExternalGroupID = &id + } + if len(ca.ExternalRoleAliases) > 0 { + id, ok := roleIDByAlias[ca.ExternalRoleAliases[0]] + Expect(ok).To(BeTrue(), "missing pre-populated external role alias %s", ca.ExternalRoleAliases[0]) + row.ExternalRoleID = &id + } + if ca.Source != "" { + row.Source = lo.ToPtr(ca.Source) + } + Expect(DefaultContext.DB().Create(&row).Error).ToNot(HaveOccurred()) + } + + for _, log := range fixture.PrePopulate.ConfigAccessLogs { + id, ok := userIDByAlias[log.ExternalUserAliases[0]] + Expect(ok).To(BeTrue(), "missing pre-populated external user alias %s", log.ExternalUserAliases[0]) + count := log.Count + row := dutymodels.ConfigAccessLog{ + ConfigID: configIDByExternalID[log.ConfigExternalID], + ExternalUserID: id, + ScraperID: scraperModel.ID, + CreatedAt: now.Add(time.Duration(log.CreatedAtOffsetMins) * time.Minute), + Count: &count, + Properties: log.Properties, + } + if log.MFA != nil { + row.MFA = *log.MFA + } + Expect(DefaultContext.DB().Create(&row).Error).ToNot(HaveOccurred()) } - // Cleanup after test defer func() { + DefaultContext.DB().Exec("DELETE FROM config_access_logs WHERE scraper_id = ?", scraperModel.ID) + DefaultContext.DB().Exec("DELETE FROM config_access WHERE scraper_id = ?", scraperModel.ID) for _, id := range createdItems { DefaultContext.DB().Where("config_id = ?", id).Delete(&models.ConfigChange{}) DefaultContext.DB().Delete(&models.ConfigItem{}, "id = ?", id) } + // Clean up external entities for this scraper + DefaultContext.DB().Exec("DELETE FROM external_user_groups WHERE external_user_id IN (SELECT id FROM external_users WHERE scraper_id = ?)", scraperModel.ID) + DefaultContext.DB().Exec("DELETE FROM external_users WHERE scraper_id = ?", scraperModel.ID) + DefaultContext.DB().Exec("DELETE FROM external_groups WHERE scraper_id = ?", scraperModel.ID) + DefaultContext.DB().Exec("DELETE FROM external_roles WHERE scraper_id = ?", scraperModel.ID) DefaultContext.DB().Where("id = ?", scraperModel.ID).Delete(&dutymodels.ConfigScraper{}) }() - // Run scraper scraperCtx := ctx.WithScrapeConfig(&config) scraperCtx, err = scraperCtx.InitTempCache() Expect(err).ToNot(HaveOccurred()) @@ -114,11 +272,11 @@ var _ = Describe("e2e extraction fixtures", func() { summary, err := db.SaveResults(scraperCtx, results) Expect(err).ToNot(HaveOccurred()) - // Build CEL env by aggregating all results env := buildE2EEnv(results, summary) + env["scraper_id"] = scraperModel.ID.String() for _, expr := range fixture.Assertions { - ok, err := gomplate.RunTemplateBool(env, gomplate.Template{Expression: expr}) + ok, err := DefaultContext.RunTemplateBool(gomplate.Template{Expression: expr}, env) Expect(err).ToNot(HaveOccurred(), "CEL error in %s: %s", name, expr) Expect(ok).To(BeTrue(), "assertion failed in %s: %s\nenv: %v", name, expr, env) } @@ -150,7 +308,6 @@ func buildE2EEnv(results []v1.ScrapeResult, summary v1.ScrapeSummary) map[string "config": nil, } - // Marshal changes to map form for CEL changesRaw, _ := json.Marshal(allChanges) var changesSlice []any _ = json.Unmarshal(changesRaw, &changesSlice) @@ -163,7 +320,6 @@ func buildE2EEnv(results []v1.ScrapeResult, summary v1.ScrapeSummary) map[string env[key] = []any{} } - // Build summary compatible with ExtractionSummary shape used by lightweight fixtures totals := summary.Totals() env["summary"] = map[string]any{ "changes": map[string]any{