diff --git a/.gitignore b/.gitignore index 7a00b69..f0f15bf 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ # Claude .claude/ CLAUDE.md +CLAUDE.local.md # Test data outputs testdata/out.csv diff --git a/README.md b/README.md index ff8f84c..c0547ef 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,12 @@ -# vault-csv-normalizer +# vault-csv-count [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) +> **Disclaimer:** This is an unofficial, community-provided tool. It is not +> created, endorsed, or supported by HashiCorp or IBM. Use at your own risk. +> No warranty is provided. For official Vault client counting guidance, refer +> to the [HashiCorp Vault documentation](https://developer.hashicorp.com/vault/docs). + A CLI tool that reads one or more **HashiCorp Vault client export CSV files**, normalizes their data (consistent column names, types, and values across Vault versions), and displays a summary of client counts by mount path and type. @@ -18,7 +23,7 @@ versions), and displays a summary of client counts by mount path and type. - Normalizes **namespace paths** (empty/`root` → `[root]`, ensures trailing `/`) - Normalizes **mount paths** (ensures trailing `/`) - Normalizes **timestamps** to UTC across all common Vault timestamp formats -- **Deduplicates** clients across files by `client_id` when `-d` is set, by normalized `entity_alias_name` (`--dedup-alias`), or by alias within explicit auth-method groups (`--dedup-methods ldap,oidc`); alias normalization strips domain suffixes (`@corp.com`) and tier suffixes (`-t0`/`-t1`/`-t2`) +- **Deduplicates** clients within each file by alias within explicit auth-method groups (`--dedup-methods-per-file ldap,oidc`); alias normalization strips domain suffixes (`@corp.com`) - **Filters** by namespace (substring) or client type - **Sorts** by any column - Prints a **summary** with counts broken down by mount path and client type @@ -28,8 +33,6 @@ versions), and displays a summary of client counts by mount path and type. ## Installation ```bash -git clone https://github.com/your-org/vault-csv-normalizer -cd vault-csv-normalizer make build # Binary is at ./bin/vault-csv-normalizer ``` @@ -66,56 +69,51 @@ OPTIONS: Apply a since filter to one specific file only. May be specified multiple times for different files. The filename is matched against the base name (e.g. jan.csv=2024-01-15). - -d Deduplicate records by client_id across all input files. - -dedup-alias - Deduplicate by entity_alias_name within the same identity group across - all input files. LDAP and OIDC are treated as one group (the same - person typically has the same username in both). Two records are - considered the same client if they share the same normalized alias AND - belong to the same identity group, regardless of mount accessor or - source file. Normalization strips the domain suffix (at '@') and any - trailing tier suffix (-t0, -t1, -t2), so "sbishop" (LDAP), "sbishop-t0" - (LDAP, another file), and "sbishop@corp.com" (OIDC) → one client. - JWT is a separate group and is not collapsed here; use --dedup-jwt for - JWT vs LDAP/OIDC dedup. - Duplicate groups are printed as a table before the summary. - Records without an alias are always kept. May be combined with -d. - -dedup-methods method1,method2,... - Apply alias deduplication (same normalization as --dedup-alias) but - only for records whose auth method appears in the specified - comma-separated group. Methods in the same group are treated as one - identity — a person authenticating via any of them is counted once. - Records whose auth method is not in any group pass through unchanged. + -dedup-methods-per-file method1,method2,... + Deduplicate by alias for records whose auth method appears in the + specified comma-separated group, scoped to each input file + independently. Records in different files with the same alias are NOT + collapsed — only within-file duplicates are removed. Normalization + strips domain suffixes (at '@') only; tier suffixes (-t0/-t1/-t2) are + kept. Records whose auth method is not in any group pass through + unchanged. The flag is repeatable; each use defines one independent group: - -dedup-methods ldap,oidc - Deduplicate LDAP and OIDC as one identity group. "alice" (LDAP), - "alice@corp.com" (OIDC), and "alice-t0" (LDAP) all normalize to - "alice" and are counted once. JWT records are unaffected. - - -dedup-methods ldap,oidc,jwt - Treat LDAP, OIDC, and JWT together as one group. - - -dedup-methods ldap,oidc -dedup-methods jwt,saml - Two independent groups: {ldap,oidc} and {jwt,saml}. Records in - different groups are never collapsed against each other. - - Duplicate groups are printed as a table before the summary (same - format as --dedup-alias). Records without an alias and PKI clients are - always kept. May be combined with --dedup-alias, --dedup-jwt, and/or -d. - -dedup-jwt - Drop JWT records whose normalized alias matches a non-JWT record across - any input file. Uses the same normalization as --dedup-alias (strips - '@domain' and '-t0'/'-t1'/'-t2'). Prevents the same person from being - counted twice when they authenticate via both LDAP/OIDC and JWT. - Records without an alias are always kept. May be combined with - --dedup-alias, --dedup-methods, and/or -d. + -dedup-methods-per-file ldap,oidc + Within each file, collapse LDAP and OIDC records that share the + same alias. "alice" (LDAP) and "alice@corp.com" (OIDC) in the + same file normalize to "alice" and are counted once. A user in + jan.csv and feb.csv is NOT collapsed — counted once per file. + + -dedup-methods-per-file ldap,oidc,jwt + Treat LDAP, OIDC, and JWT as one group within each file. + + -dedup-methods-per-file ldap,oidc -dedup-methods-per-file jwt,saml + Two independent per-file groups. + + Duplicate groups are printed as a table before the summary. Records + without an alias and PKI clients are always kept. + -remove-abandoned-clients + Remove abandoned clients where entity_name and entity_alias_name are + both blank. This includes records with no auth mount (mount_path + empty) and merged/deleted entities (mount_path present). Applied after + all deduplication steps. + -generate-tf + Generate Terraform HCL stubs for entity clients with no alias in the + export. Requires --dedup-methods-per-file. A client is targeted when + entity_alias_name is blank and mount_accessor is non-empty. For each + such client, vault_identity_entity and vault_identity_entity_alias + resources are written to vault-aliases.tf. Mount accessors are emitted + as Terraform variables. Does not affect counts or summary output. -per-file Print a summary for each input file before the combined summary -debug Print all records grouped by mount path, with a full record table under each mount. Records with no mount path are grouped as "(no mount)". + Also prints how many records were removed by + --remove-abandoned-clients when that flag is enabled, split into + no-mount and merged/deleted buckets. -help Show usage information ``` @@ -161,35 +159,23 @@ vault-csv-normalizer -f jan.csv feb.csv --per-file # Debug: show all records grouped by mount path vault-csv-normalizer -f export.csv --debug -# Deduplicate client_ids across files -vault-csv-normalizer -f jan.csv feb.csv -d - -# Deduplicate by entity alias — strips domain (@corp.com) and tier (-t0/-t1/-t2) -# "alice", "alice-t0", "alice-t1", "alice@corp.com" → counted as one client per file -vault-csv-normalizer -f jan.csv feb.csv --dedup-alias - -# Combine both: alias dedup collapses tier/domain variants within each file, -# then -d deduplicates the same client_id appearing across multiple files -vault-csv-normalizer -f jan.csv feb.csv --dedup-alias -d - -# Drop JWT records where the same person already appears via LDAP or OIDC -vault-csv-normalizer -f export.csv --dedup-jwt +# Remove abandoned clients from final totals +vault-csv-normalizer -f export.csv --remove-abandoned-clients -# Full dedup: collapse tiers, dedup client_ids, then drop redundant JWT records -vault-csv-normalizer -f jan.csv feb.csv --dedup-alias -d --dedup-jwt +# Generate Terraform stubs for unaliased LDAP/OIDC clients +vault-csv-normalizer -f export.csv --dedup-methods-per-file ldap,oidc --generate-tf -# Deduplicate LDAP and OIDC as one identity group — same person via either -# method is counted once; other auth methods are unaffected -vault-csv-normalizer -f export.csv --dedup-methods ldap,oidc +# Same as above, with debug count output for removed rows +vault-csv-normalizer -f export.csv --remove-abandoned-clients --debug -# Treat LDAP, OIDC, and JWT together as one human-identity group -vault-csv-normalizer -f export.csv --dedup-methods ldap,oidc,jwt +# Within each file, collapse LDAP and OIDC records with the same alias +vault-csv-normalizer -f jan.csv feb.csv --dedup-methods-per-file ldap,oidc -# Two independent groups: {ldap,oidc} and {jwt,saml} -vault-csv-normalizer -f export.csv -dedup-methods ldap,oidc --dedup-methods jwt,saml +# Treat LDAP, OIDC, and JWT as one group within each file +vault-csv-normalizer -f jan.csv feb.csv --dedup-methods-per-file ldap,oidc,jwt -# Method-scoped dedup combined with client_id dedup -vault-csv-normalizer -f jan.csv feb.csv --dedup-methods ldap,oidc -d +# Two independent per-file groups: {ldap,oidc} and {jwt,saml} +vault-csv-normalizer -f jan.csv feb.csv --dedup-methods-per-file ldap,oidc --dedup-methods-per-file jwt,saml # Exclude records created before 2024-06-01 vault-csv-normalizer -f export.csv --since 2024-06-01 @@ -241,50 +227,59 @@ PKI Client Summary ## Alias-based deduplication Vault can record the same human as multiple clients when they authenticate via -different auth methods (e.g. LDAP in one session and OIDC in another) or as -tiered accounts (`alice`, `alice-t0`, `alice-t1`). The alias-based dedup flags -collapse these into a single count. +different auth methods (e.g. LDAP in one session and OIDC in another). +`--dedup-methods-per-file` collapses these into a single count within each file. -### Alias normalization +### How deduplication works -All alias-based dedup paths apply the same two-step normalization before -comparing: +Each auth method stores a different value as the entity alias in Vault: -1. **Strip domain suffix** — everything from `@` onward is removed. - `alice@corp.com` → `alice` -2. **Strip tier suffix** — trailing `-t0`, `-t1`, or `-t2` is removed. - `alice-t0` → `alice` +| Auth method | What Vault stores as `entity_alias_name` | +|---|---| +| `ldap` | Bare username: `alice` | +| `oidc` | Bare username (from `entity_alias_metadata.username`): `alice` | +| `jwt` | Full email address: `alice@corp.com` | -So `alice`, `alice-t0`, `alice-t1`, `alice@corp.com`, and `alice-t0@corp.com` -all normalize to `alice` and are treated as the same person. +The tool normalizes all three to a common base by stripping the domain suffix +(`alice@corp.com` → `alice`), then matches records within the same file that +share the same normalized alias and belong to the same method group. -### Choosing a dedup flag +**This only works when the same string is used as the identity across all auth +methods.** If `alice` logs in via LDAP as `alice` and via JWT as +`alice@corp.com`, the normalization produces `alice` for both — they collapse. +If the LDAP username and the JWT email prefix do not match (e.g. `asmith` vs +`alice.smith@corp.com`), the records will not be collapsed. -| Flag | What it collapses | What it leaves separate | -|---|---|---| -| `--dedup-alias` | All auth methods, grouped so LDAP=OIDC; each other type is its own group | JWT vs LDAP/OIDC | -| `--dedup-methods ldap,oidc` | Only LDAP and OIDC, as one explicit group | Everything else untouched | -| `--dedup-methods ldap,oidc,jwt` | LDAP, OIDC, and JWT as one group | Everything else untouched | -| `--dedup-jwt` | JWT records that match an existing LDAP/OIDC alias | Non-JWT records | +### Required conditions for cross-method dedup -These flags are independent and can be combined. A common production workflow: +All of the following must be true for two records to be deduplicated: -```bash -# Count human users once, across LDAP and OIDC, then remove JWT duplicates, -# then collapse the same client_id appearing across multiple monthly exports -vault-csv-normalizer -f jan.csv feb.csv mar.csv \ - --dedup-methods ldap,oidc \ - --dedup-jwt \ - -d -``` +1. Both records are in the **same source file** — records across files are never collapsed. +2. Both records' auth methods appear in the **same comma-separated list** passed to `--dedup-methods-per-file`. With `--dedup-methods-per-file ldap,oidc,jwt`, an LDAP and a JWT record can collapse. With `--dedup-methods-per-file ldap,oidc --dedup-methods-per-file jwt,saml`, an LDAP and a JWT record will never collapse — they are in separate groups. +3. Both records have a **non-empty `entity_alias_name`** (or `entity_alias_metadata.username` for OIDC). +4. The **normalized alias matches** — after stripping the domain suffix, the alias strings are identical. +5. Neither record is a **PKI client** (`client_type=acme` or `mount_accessor` prefix `auth_cert`). + +If any condition is not met, both records pass through unchanged. + +### Alias normalization + +`--dedup-methods-per-file` applies one normalization step before comparing: + +**Strip domain suffix** — everything from `@` onward is removed. +`alice@corp.com` → `alice` + +This lets JWT records (which use full email addresses) match LDAP/OIDC records +(which use bare usernames), provided the local part of the email is the same +as the LDAP/OIDC username. ### Auth methods reference | `mount_type` / `auth_method` | Typical users | Notes | |---|---|---| -| `ldap` | Humans | Aliases usually bare usernames (`alice`) or tiered (`alice-t0`) | -| `oidc` | Humans | Aliases usually `username@domain.com` — normalize to same base as LDAP | -| `jwt` | Humans or services | May share aliases with LDAP/OIDC; use `--dedup-jwt` or `--dedup-methods` | +| `ldap` | Humans | Aliases are bare usernames (`alice`) | +| `oidc` | Humans | Aliases are bare usernames from `entity_alias_metadata.username` (`alice`) | +| `jwt` | Humans or services | Aliases are full email addresses (`alice@corp.com`); domain is stripped to match LDAP/OIDC | | `approle` | Service accounts | Not human; not typically alias-deduped | | `kubernetes` | Service accounts | Not human; not typically alias-deduped | | `aws` / `gcp` | Service accounts | Not human; not typically alias-deduped | @@ -315,7 +310,7 @@ The tool expects CSVs exported from the Vault activity export API | `client_type` | No | Type of client (entity, non-entity, acme, etc.) | | `token_creation_time` | No | RFC3339 timestamp of token creation | | `client_first_usage_time`| No | RFC3339 timestamp of first authenticated call | -| `entity_alias_name` | No | Human-readable alias for the entity (used by `--dedup-alias` and `--dedup-methods`; domain and tier suffixes are stripped during normalization) | +| `entity_alias_name` | No | Human-readable alias for the entity (used by `--dedup-methods-per-file`; domain suffix is stripped during normalization) | ### Supported Column Aliases diff --git a/cmd/vault-csv-normalizer/main.go b/cmd/vault-csv-normalizer/main.go index 1bcdb30..3ae4a37 100644 --- a/cmd/vault-csv-normalizer/main.go +++ b/cmd/vault-csv-normalizer/main.go @@ -12,6 +12,7 @@ import ( "github.com/vault-csv-normalizer/internal/normalizer" "github.com/vault-csv-normalizer/internal/parser" "github.com/vault-csv-normalizer/internal/renderer" + "github.com/vault-csv-normalizer/internal/tfgen" ) // multiFlag allows a flag to be specified multiple times. @@ -39,16 +40,15 @@ func (f fileDateFlag) Set(v string) error { func main() { var inputFiles multiFlag - var dedupMethods multiFlag + var dedupMethodsPerFile multiFlag var sortBy string var filterNS string var filterType string var filterSince string var filterSinceFile = make(fileDateFlag) var countPKI bool - var dedup bool - var dedupAlias bool - var dedupJWT bool + var removeAbandonedClients bool + var generateTF bool var listMethods bool var debugMode bool var perFile bool @@ -61,11 +61,10 @@ func main() { flag.StringVar(&filterSince, "since", "", "Exclude records with a token_creation_time before this value (e.g. 2024-01-01 or 2024-01-01T00:00:00Z)") flag.Var(&filterSinceFile, "since-file", "Apply a since filter to one file only: filename=date. May be specified multiple times for different files.") flag.BoolVar(&countPKI, "p", false, "Partition and report PKI/cert clients (client_type=acme or mount_accessor prefix auth_cert) separately") - flag.BoolVar(&dedup, "d", false, "Deduplicate records by client_id across all input files") - flag.BoolVar(&dedupAlias, "dedup-alias", false, "Deduplicate by entity_alias_name (strips domain and -t0/-t1/-t2 tier suffixes; records without an alias are always kept; may be combined with -d)") - flag.Var(&dedupMethods, "dedup-methods", "Deduplicate by alias for the specified comma-separated auth methods, treating them as one identity group. Repeatable to define multiple groups (e.g. -dedup-methods ldap,oidc -dedup-methods jwt,saml).") - flag.BoolVar(&dedupJWT, "dedup-jwt", false, "Drop JWT records whose normalized alias matches a non-JWT record in the same file (prevents counting the same person via both LDAP/OIDC and JWT)") - flag.BoolVar(&listMethods, "list-methods", false, "Print every distinct auth method found in the input files (with record counts and alias coverage), then exit. Useful for deciding --dedup-methods groups.") + flag.Var(&dedupMethodsPerFile, "dedup-methods-per-file", "Deduplicate by alias for the specified comma-separated auth methods, scoped to each input file independently. Records in different files are never collapsed against each other. Repeatable to define multiple groups.") + flag.BoolVar(&removeAbandonedClients, "remove-abandoned-clients", false, "Remove abandoned clients (blank entity_name and entity_alias_name) after deduplication. Includes records with no auth mount and merged/deleted entities.") + flag.BoolVar(&generateTF, "generate-tf", false, "Generate Terraform HCL stubs for entity clients with no alias. Requires --dedup-methods-per-file. Output written to vault-aliases.tf.") + flag.BoolVar(&listMethods, "list-methods", false, "Print every distinct auth method found in the input files (with record counts and alias coverage), then exit. Useful for deciding --dedup-methods-per-file groups.") flag.BoolVar(&debugMode, "debug", false, "Print all records grouped by mount path") flag.BoolVar(&perFile, "per-file", false, "Print a summary for each input file before the combined summary") flag.BoolVar(&showHelp, "help", false, "Show usage information") @@ -115,10 +114,8 @@ func main() { os.Exit(0) } - // Parse --dedup-methods values into groups. Each flag value is a - // comma-separated list of mount types that form one identity group. - var methodGroups [][]string - for _, val := range dedupMethods { + var methodGroupsPerFile [][]string + for _, val := range dedupMethodsPerFile { var group []string for _, m := range strings.Split(val, ",") { m = strings.TrimSpace(strings.ToLower(m)) @@ -127,70 +124,38 @@ func main() { } } if len(group) > 0 { - methodGroups = append(methodGroups, group) + methodGroupsPerFile = append(methodGroupsPerFile, group) } } - // Snapshot pre-dedup records so debug mode can show alias groups from the - // original data regardless of which dedup flags are active. preDedup := normalized - if dedupAlias { - groups := normalizer.FindAliasDuplicates(preDedup) - if len(groups) > 0 { - fmt.Fprintf(os.Stdout, "Alias duplicates found (%d group(s))\n", len(groups)) - fmt.Fprintln(os.Stdout, "=====================================") - for _, group := range groups { + var aliasGroups [][]normalizer.Record + if len(methodGroupsPerFile) > 0 { + aliasGroups = normalizer.FindAliasDuplicatesForMethodsPerFile(preDedup, methodGroupsPerFile) + if len(aliasGroups) > 0 { + fmt.Fprintf(os.Stdout, "Per-file method-scoped alias duplicates found (%d group(s))\n", len(aliasGroups)) + fmt.Fprintln(os.Stdout, "=====================================================") + for _, group := range aliasGroups { r0 := group[0] fmt.Fprintf(os.Stdout, "\nAlias group: %q file: %s\n", - normalizer.StripTierSuffix(normalizer.BaseAlias(r0.EntityAliasName)), filepath.Base(r0.Source)) + normalizer.BaseAlias(r0.EntityAliasName), filepath.Base(r0.Source)) renderer.PrintTable(os.Stdout, group) } fmt.Fprintln(os.Stdout) } - normalized = normalizer.DeduplicateByAlias(normalized) - } - if len(methodGroups) > 0 { - groups := normalizer.FindAliasDuplicatesForMethods(preDedup, methodGroups) - if len(groups) > 0 { - fmt.Fprintf(os.Stdout, "Method-scoped alias duplicates found (%d group(s))\n", len(groups)) - fmt.Fprintln(os.Stdout, "================================================") - for _, group := range groups { - r0 := group[0] - fmt.Fprintf(os.Stdout, "\nAlias group: %q file: %s\n", - normalizer.StripTierSuffix(normalizer.BaseAlias(r0.EntityAliasName)), filepath.Base(r0.Source)) - renderer.PrintTable(os.Stdout, group) - } - fmt.Fprintln(os.Stdout) - } - normalized = normalizer.DeduplicateByAliasForMethods(normalized, methodGroups) + normalized = normalizer.DeduplicateByAliasForMethodsPerFile(normalized, methodGroupsPerFile) } - // Collect -d dedup statistics before running so debug mode can report - // exactly which client_ids were (or weren't) collapsed. - var clientIDDupsBefore int - var clientIDDupsAfter int - var clientIDDupMap map[string]int // client_id → count of input records - if dedup && debugMode { - clientIDDupsBefore = len(normalized) - idCount := make(map[string]int, len(normalized)) - for _, r := range normalized { - idCount[r.ClientID]++ - } - clientIDDupMap = make(map[string]int) - for id, n := range idCount { - if n > 1 { - clientIDDupMap[id] = n - } - } - } - if dedup { - normalized = normalizer.Deduplicate(normalized) - if debugMode { - clientIDDupsAfter = len(normalized) - } - } - if dedupJWT { - normalized = normalizer.DeduplicateJWT(normalized) + removedAbandonedCounts := normalizer.AbandonedClientCounts{} + if removeAbandonedClients { + normalized, removedAbandonedCounts = normalizer.FilterAbandonedClients(normalized) + + fmt.Fprintf(os.Stdout, "Removed abandoned clients (total): %d\n", removedAbandonedCounts.Total()) + fmt.Fprintf(os.Stdout, " no auth mount (mount path empty): %d (PKI: %d, non-PKI: %d)\n", + removedAbandonedCounts.NoMount, removedAbandonedCounts.NoMountPKI, removedAbandonedCounts.NoMount-removedAbandonedCounts.NoMountPKI) + fmt.Fprintf(os.Stdout, " merged/deleted (mount path present): %d (PKI: %d, non-PKI: %d)\n", + removedAbandonedCounts.MergedDeleted, removedAbandonedCounts.MergedDeletedPKI, removedAbandonedCounts.MergedDeleted-removedAbandonedCounts.MergedDeletedPKI) + fmt.Fprintln(os.Stdout, strings.Repeat("-", 70)) } // Apply filters. @@ -215,47 +180,24 @@ func main() { os.Exit(1) } - if debugMode { - // Show -d dedup results so the user can see which client_ids were (or - // weren't) collapsed, and understand why records still appear after dedup. - if dedup { - collapsed := clientIDDupsBefore - clientIDDupsAfter - fmt.Fprintf(os.Stdout, "Debug: -d client_id dedup — before: %d after: %d collapsed: %d\n", - clientIDDupsBefore, clientIDDupsAfter, collapsed) - fmt.Fprintln(os.Stdout, strings.Repeat("-", 70)) - if len(clientIDDupMap) > 0 { - dupIDs := make([]string, 0, len(clientIDDupMap)) - for id := range clientIDDupMap { - dupIDs = append(dupIDs, id) - } - sort.Strings(dupIDs) - for _, id := range dupIDs { - fmt.Fprintf(os.Stdout, " %s (x%d → kept 1)\n", id, clientIDDupMap[id]) - } - } else { - fmt.Fprintln(os.Stdout, " (no duplicate client_ids found)") + if generateTF { + if len(methodGroupsPerFile) == 0 { + fmt.Fprintln(os.Stderr, "warning: --generate-tf has no effect without --dedup-methods-per-file") + } else { + n, err := tfgen.GenerateTF(aliasGroups, "vault-aliases.tf") + if err != nil { + fmt.Fprintf(os.Stderr, "error: --generate-tf: %v\n", err) + os.Exit(1) } - fmt.Fprintln(os.Stdout) - } - - // Show alias groups from the original (pre-dedup) data so the user can - // see aliasing context regardless of which dedup flags are active. - // Skip when -dedup-alias is set because it already printed these above. - if !dedupAlias { - groups := normalizer.FindAliasDuplicates(preDedup) - if len(groups) > 0 { - fmt.Fprintf(os.Stdout, "Debug: alias groups in input data (%d group(s))\n", len(groups)) - fmt.Fprintln(os.Stdout, "===============================================") - for _, group := range groups { - r0 := group[0] - fmt.Fprintf(os.Stdout, "\nAlias group: %q file: %s\n", - normalizer.StripTierSuffix(normalizer.BaseAlias(r0.EntityAliasName)), filepath.Base(r0.Source)) - renderer.PrintTable(os.Stdout, group) - } - fmt.Fprintln(os.Stdout) + if n == 0 { + fmt.Fprintln(os.Stdout, "generate-tf: no alias groups found — nothing to generate") + } else { + fmt.Fprintf(os.Stdout, "generate-tf: wrote %d entity stub(s) to vault-aliases.tf\n", n) } } + } + if debugMode { // Group final (post-dedup) records by mount path. var mountOrder []string byMount := make(map[string][]normalizer.Record) @@ -276,21 +218,21 @@ func main() { fmt.Fprintf(os.Stdout, "\nMount: %s (%d record(s))\n", mp, len(group)) renderer.PrintTable(os.Stdout, group) // Flag records within this mount that share an entity alias but have - // different client_ids — these are candidates for -dedup-alias. + // different client_ids — use --dedup-methods-per-file to collapse them. if len(group) > 1 { aliasToIDs := make(map[string][]string) for _, r := range group { if r.EntityAliasName == "" { continue } - norm := normalizer.StripTierSuffix(normalizer.BaseAlias(r.EntityAliasName)) + norm := normalizer.BaseAlias(r.EntityAliasName) aliasToIDs[norm] = append(aliasToIDs[norm], r.ClientID) } for alias, ids := range aliasToIDs { if len(ids) < 2 { continue } - // Check that not all client_ids are the same (already handled by -d). + // Check that not all client_ids are the same. allSame := true for _, id := range ids[1:] { if id != ids[0] { @@ -299,7 +241,7 @@ func main() { } } if !allSame { - fmt.Fprintf(os.Stdout, " !! alias %q has %d records with different client_ids — use -dedup-alias to collapse\n", alias, len(ids)) + fmt.Fprintf(os.Stdout, " !! alias %q has %d records with different client_ids — use --dedup-methods-per-file to collapse\n", alias, len(ids)) } } } @@ -307,7 +249,7 @@ func main() { fmt.Fprintln(os.Stdout) } - if perFile { + if (perFile || len(methodGroupsPerFile) > 0) && len(inputFiles) > 1 { bySource := make(map[string][]normalizer.Record, len(inputFiles)) for _, r := range normalized { bySource[r.Source] = append(bySource[r.Source], r) @@ -372,8 +314,8 @@ func printMethodList(records []normalizer.Record, files []string) { fmt.Fprintf(os.Stdout, " %-20s %8d %10d\n", mt, s.total, s.withAlias) } fmt.Fprintln(os.Stdout) - fmt.Fprintln(os.Stdout, "Tip: use --dedup-methods to group methods into human/machine identity sets.") - fmt.Fprintln(os.Stdout, " Example: --dedup-methods ldap,oidc,jwt --dedup-methods approle,kubernetes") + fmt.Fprintln(os.Stdout, "Tip: use --dedup-methods-per-file to group methods into human/machine identity sets.") + fmt.Fprintln(os.Stdout, " Example: --dedup-methods-per-file ldap,oidc,jwt --dedup-methods-per-file approle,kubernetes") } func printUsage() { @@ -414,6 +356,9 @@ EXAMPLES: # Per-file since filters on multiple files vault-csv-normalizer -f jan.csv feb.csv --since-file jan.csv=2024-01-15 --since-file feb.csv=2024-02-01 + # Remove abandoned clients (blank entity fields) + vault-csv-normalizer -f export.csv --remove-abandoned-clients + CSV FORMAT (Vault activity export): Expected columns (order-independent, case-insensitive): client_id, namespace_id, namespace_path, mount_accessor, mount_path, @@ -428,43 +373,31 @@ CSV FORMAT (Vault activity export): Optional column: entity_alias_name (also accepted as: alias_name, entity_alias) - When present, --dedup-alias collapses records that share the same - normalized alias within the same identity group across all input files. - LDAP and OIDC are treated as one group. Normalization strips the domain - suffix (at '@') and any trailing tier suffix (-t0, -t1, -t2). - "sbishop" (LDAP, jan.csv), "sbishop-t0" (LDAP, feb.csv), and - "sbishop@corp.com" (OIDC) → one client. JWT is a separate group; - use --dedup-jwt to additionally collapse JWT against LDAP/OIDC. - - --dedup-jwt uses the same normalization to match JWT records against - non-JWT records in the same file. A JWT record is dropped if a non-JWT - record (e.g. LDAP or OIDC) shares the same normalized alias, preventing - the same person from being counted twice when they authenticate via both - methods. Can be combined with --dedup-alias and/or -d. - - --dedup-methods - Apply alias deduplication (same normalization as --dedup-alias) but only - for records whose auth method appears in the specified comma-separated - group. Methods in the same group are treated as one identity — a person - authenticating via any of them is counted once. Records whose auth method - is not in any group pass through unchanged. + When present, --dedup-methods-per-file collapses records that share + the same normalized alias within each source file. Normalization strips + the domain suffix (at '@'). Tier suffixes (-t0, -t1, -t2) are NOT + stripped — "sbishop-t0" and "sbishop-t1" are treated as distinct + identities within a file. + + --dedup-methods-per-file + Deduplicate by alias for records whose auth method appears in the + specified comma-separated group, scoped to each input file + independently. Records in different files with the same normalized + alias are NOT collapsed — only within-file duplicates are removed. + Useful when files represent different billing periods and you want to + count a returning user once per file rather than once globally. + + Normalization strips domain suffixes (at '@') only — tier suffixes + like -t0/-t1 are kept, so "alice-t0" and "alice-t1" are distinct. The flag is repeatable; each use defines one independent group: - --dedup-methods ldap,oidc - Deduplicate LDAP and OIDC as one identity group. "alice" (LDAP), - "alice@corp.com" (OIDC), and "alice-t0" (LDAP) all normalize to - "alice" and are counted once. - - --dedup-methods ldap,oidc,jwt - Treat LDAP, OIDC, and JWT together as one group. - - --dedup-methods ldap,oidc --dedup-methods jwt,saml - Two independent groups: {ldap,oidc} and {jwt,saml}. A person - appearing in both LDAP and OIDC is counted once; a person - appearing in both JWT and SAML is counted once; but an LDAP - record and a JWT record for the same person are not collapsed - (unless both groups are merged into one). + --dedup-methods-per-file ldap,oidc + Within each file, collapse LDAP and OIDC records that share the + same alias. A user in jan.csv (LDAP) and feb.csv (OIDC) is NOT + collapsed — they appear once per file. - Can be combined with --dedup-alias, --dedup-jwt, and/or -d.`) + --dedup-methods-per-file ldap,oidc --dedup-methods-per-file jwt,saml + Two independent per-file groups. Records in different groups are + never collapsed against each other.`) } diff --git a/internal/normalizer/normalizer.go b/internal/normalizer/normalizer.go index 5b3e543..65ecc7d 100644 --- a/internal/normalizer/normalizer.go +++ b/internal/normalizer/normalizer.go @@ -14,30 +14,32 @@ import ( // Record is a fully normalized Vault client record. type Record struct { - Source string - ClientID string - NamespaceID string - NamespacePath string - MountAccessor string - MountPath string - MountType string - AuthMethod string - ClientType string // normalized: entity | non-entity | acme | secret-sync | unknown - TokenCreationTime time.Time - ClientFirstUsageTime time.Time - EntityAliasName string + Source string + ClientID string + EntityName string + NamespaceID string + NamespacePath string + MountAccessor string + MountPath string + MountType string + AuthMethod string + ClientType string // normalized: entity | non-entity | acme | secret-sync | unknown + TokenCreationTime time.Time + ClientFirstUsageTime time.Time + EntityAliasName string + EntityAliasMetadataUsername string } // supportedSortKeys lists columns accepted by Sort. var supportedSortKeys = map[string]bool{ - "namespace_path": true, - "client_type": true, - "token_creation_time": true, + "namespace_path": true, + "client_type": true, + "token_creation_time": true, "client_first_usage_time": true, - "mount_accessor": true, - "mount_path": true, - "auth_method": true, - "source": true, + "mount_accessor": true, + "mount_path": true, + "auth_method": true, + "source": true, } // Normalize converts a slice of raw records into normalized records. @@ -51,18 +53,20 @@ func Normalize(raw []parser.RawRecord) []Record { func normalizeOne(r parser.RawRecord) Record { return Record{ - Source: r.Source, - ClientID: r.ClientID, - NamespaceID: normalizeNamespaceID(r.NamespaceID), - NamespacePath: normalizeNamespacePath(r.NamespacePath), - MountAccessor: strings.TrimSpace(r.MountAccessor), - MountPath: normalizeMountPath(r.MountPath), - MountType: strings.ToLower(strings.TrimSpace(r.MountType)), - AuthMethod: strings.ToLower(strings.TrimSpace(r.AuthMethod)), - ClientType: normalizeClientType(r.ClientType), - TokenCreationTime: ParseTime(r.TokenCreationTime), - ClientFirstUsageTime: ParseTime(r.ClientFirstUsageTime), - EntityAliasName: strings.TrimSpace(r.EntityAliasName), + Source: r.Source, + ClientID: r.ClientID, + EntityName: strings.TrimSpace(r.EntityName), + NamespaceID: normalizeNamespaceID(r.NamespaceID), + NamespacePath: normalizeNamespacePath(r.NamespacePath), + MountAccessor: strings.TrimSpace(r.MountAccessor), + MountPath: normalizeMountPath(r.MountPath), + MountType: strings.ToLower(strings.TrimSpace(r.MountType)), + AuthMethod: strings.ToLower(strings.TrimSpace(r.AuthMethod)), + ClientType: normalizeClientType(r.ClientType), + TokenCreationTime: ParseTime(r.TokenCreationTime), + ClientFirstUsageTime: ParseTime(r.ClientFirstUsageTime), + EntityAliasName: strings.TrimSpace(r.EntityAliasName), + EntityAliasMetadataUsername: strings.TrimSpace(r.EntityAliasMetadataUsername), } } @@ -101,20 +105,20 @@ func normalizeMountPath(path string) string { // clientTypeAliases maps various raw strings to a canonical client type. var clientTypeAliases = map[string]string{ - "entity": "entity", - "entity client": "entity", - "non-entity": "non-entity", - "non_entity": "non-entity", - "non-entity client": "non-entity", - "non_entity_client": "non-entity", - "nonentity": "non-entity", - "acme": "acme", - "acme client": "acme", - "secret-sync": "secret-sync", - "secret_sync": "secret-sync", - "secretsync": "secret-sync", - "secrets sync": "secret-sync", - "secret sync": "secret-sync", + "entity": "entity", + "entity client": "entity", + "non-entity": "non-entity", + "non_entity": "non-entity", + "non-entity client": "non-entity", + "non_entity_client": "non-entity", + "nonentity": "non-entity", + "acme": "acme", + "acme client": "acme", + "secret-sync": "secret-sync", + "secret_sync": "secret-sync", + "secretsync": "secret-sync", + "secrets sync": "secret-sync", + "secret sync": "secret-sync", } func normalizeClientType(raw string) string { @@ -156,31 +160,9 @@ func ParseTime(raw string) time.Time { return time.Time{} // unparseable → zero value } -// Deduplicate removes records with duplicate ClientIDs. When duplicates exist, -// the record with a non-empty MountPath is preferred over one with an empty -// MountPath; otherwise the first occurrence is kept. -func Deduplicate(records []Record) []Record { - index := make(map[string]int, len(records)) // client_id → position in out - out := make([]Record, 0, len(records)) - for _, r := range records { - i, seen := index[r.ClientID] - if !seen { - index[r.ClientID] = len(out) - out = append(out, r) - continue - } - // Upgrade an empty-mount record if we now have a real mount path. - if out[i].MountPath == "" && r.MountPath != "" { - out[i] = r - } - } - return out -} - // BaseAlias returns the portion of an entity alias name before the first '@' // character. If no '@' is present the full name is returned. -// Example: "alice@corp.com" → "alice", "sbishop@hashicorp.com" → "sbishop", -// "sbishop-t0" → "sbishop-t0". +// Example: "alice@corp.com" → "alice", "sbishop@hashicorp.com" → "sbishop". func BaseAlias(name string) string { for i, ch := range name { if ch == '@' { @@ -190,108 +172,6 @@ func BaseAlias(name string) string { return name } -// StripTierSuffix removes a trailing "-t0", "-t1", or "-t2" suffix from name. -// Other suffixes are left unchanged. -// Example: "alice-t0" → "alice", "bob-t2" → "bob", "carol-t3" → "carol-t3". -func StripTierSuffix(name string) string { - n := len(name) - if n >= 3 && name[n-3] == '-' && name[n-2] == 't' && name[n-1] >= '0' && name[n-1] <= '2' { - return name[:n-3] - } - return name -} - -// aliasKey is the deduplication key for alias-based dedup: one record is -// allowed per (normalized alias, mount type) pair across all input files. -// Including the mount type prevents --dedup-alias from collapsing records -// across different auth methods (e.g. LDAP vs JWT); use --dedup-jwt for that. -type aliasKey struct { - base string - mountType string -} - -// dedupMountGroup maps mount types that represent the same identity provider -// to a single canonical value. OIDC and LDAP are treated as one group because -// the same person typically has the same username in both systems. -func dedupMountGroup(mt string) string { - if mt == "oidc" { - return "ldap" - } - return mt -} - -// aliasKeyFor computes the dedup key for a record. It strips the domain suffix -// (at '@') and any trailing tier suffix ("-t0"/"-t1"/"-t2"), and scopes the -// key to the mount group so that only records of the same identity type -// collapse. OIDC and LDAP share a group; JWT remains separate (use -// --dedup-jwt for JWT vs LDAP/OIDC dedup). -func aliasKeyFor(r Record) aliasKey { - mt := r.MountType - if mt == "" { - mt = r.AuthMethod - } - return aliasKey{ - base: StripTierSuffix(BaseAlias(r.EntityAliasName)), - mountType: dedupMountGroup(mt), - } -} - -// FindAliasDuplicates groups records by (BaseAlias, source file) and returns -// every group that contains more than one record. Records with a blank -// EntityAliasName or that are PKI clients are ignored. Groups are returned in -// the order the first member of each group appeared in records. -func FindAliasDuplicates(records []Record) [][]Record { - type entry struct { - key aliasKey - members []Record - } - index := make(map[aliasKey]int) - var entries []entry - - for _, r := range records { - if r.EntityAliasName == "" || IsPKIClient(r) { - continue - } - k := aliasKeyFor(r) - if idx, ok := index[k]; ok { - entries[idx].members = append(entries[idx].members, r) - } else { - index[k] = len(entries) - entries = append(entries, entry{key: k, members: []Record{r}}) - } - } - - var out [][]Record - for _, e := range entries { - if len(e.members) > 1 { - out = append(out, e.members) - } - } - return out -} - -// DeduplicateByAlias keeps at most one record per (BaseAlias, source file) -// combination. The same user authenticating via multiple mount accessors in -// the same file is collapsed to one record. Records with a blank -// EntityAliasName or that are PKI clients are always kept. -func DeduplicateByAlias(records []Record) []Record { - seen := make(map[aliasKey]struct{}, len(records)) - out := make([]Record, 0, len(records)) - for _, r := range records { - if r.EntityAliasName == "" || IsPKIClient(r) { - out = append(out, r) - continue - } - k := aliasKeyFor(r) - if _, dup := seen[k]; dup { - continue - } - seen[k] = struct{}{} - out = append(out, r) - } - return out -} - // buildMethodGroupMap converts a list of groups (each a slice of mount-type // strings) into a map from every member to the group's canonical value (the // first element of the group). Methods not present in any group are absent @@ -310,54 +190,79 @@ func buildMethodGroupMap(groups [][]string) map[string]string { return m } -// aliasKeyForMethods computes the dedup key for a record using a caller-supplied -// group map (from buildMethodGroupMap). If the record's mount type is not in the -// map the second return value is false, meaning the record should not participate -// in method-scoped dedup. -func aliasKeyForMethods(r Record, groupMap map[string]string) (aliasKey, bool) { +// aliasKeyInFile is the deduplication key for per-file alias dedup. It includes +// the source file so records from different files are never collapsed together. +type aliasKeyInFile struct { + base string + mountType string + source string +} + +// isOIDC reports whether r was authenticated via OIDC. +func isOIDC(r Record) bool { + return r.MountType == "oidc" || r.AuthMethod == "oidc" +} + +// effectiveAliasInFile returns the alias to use for per-file dedup. For OIDC +// records, entity_alias_metadata.username holds the human-readable username; +// entity_alias_name may be a subject identifier (UUID or email) that doesn't +// match other methods. All other methods use entity_alias_name directly. +func effectiveAliasInFile(r Record) string { + if isOIDC(r) && r.EntityAliasMetadataUsername != "" { + return r.EntityAliasMetadataUsername + } + return r.EntityAliasName +} + +// aliasKeyInFileFor computes the per-file dedup key for a record. It applies +// BaseAlias (strips everything after '@' if present) but not StripTierSuffix, +// so "alice-t0" and "alice-t1" are treated as distinct identities. The '@' +// strip is needed for JWT, which uses full email addresses ("alice@corp.com"); +// LDAP uses bare usernames ("alice"); OIDC uses entity_alias_metadata.username. +// Returns false if the record's mount type is not in any provided group. +func aliasKeyInFileFor(r Record, groupMap map[string]string) (aliasKeyInFile, bool) { mt := r.MountType if mt == "" { mt = r.AuthMethod } canonical, ok := groupMap[mt] if !ok { - return aliasKey{}, false + return aliasKeyInFile{}, false } - return aliasKey{ - base: StripTierSuffix(BaseAlias(r.EntityAliasName)), + return aliasKeyInFile{ + base: BaseAlias(effectiveAliasInFile(r)), mountType: canonical, + source: r.Source, }, true } -// FindAliasDuplicatesForMethods is the same as FindAliasDuplicates but only -// considers records whose auth method (MountType or AuthMethod) appears in one -// of the provided groups. Each group is a slice of mount-type strings that -// should be treated as the same identity (e.g. ["ldap","oidc"]). Records whose -// method is not in any group are not reported. Groups are independent: records -// in different groups are never compared against each other. -func FindAliasDuplicatesForMethods(records []Record, groups [][]string) [][]Record { +// FindAliasDuplicatesForMethodsPerFile groups records by normalized alias within +// each source file. Records in different files with the same alias are not +// reported as duplicates. Matching uses only the portion of the alias left of +// '@'; tier suffixes (-t0/-t1/-t2) are not stripped and must match exactly. +func FindAliasDuplicatesForMethodsPerFile(records []Record, groups [][]string) [][]Record { groupMap := buildMethodGroupMap(groups) type entry struct { - key aliasKey + key aliasKeyInFile members []Record } - index := make(map[aliasKey]int) + index := make(map[aliasKeyInFile]int) var entries []entry for _, r := range records { - if r.EntityAliasName == "" || IsPKIClient(r) { + if effectiveAliasInFile(r) == "" || IsPKIClient(r) { continue } - k, ok := aliasKeyForMethods(r, groupMap) + kf, ok := aliasKeyInFileFor(r, groupMap) if !ok { continue } - if idx, exists := index[k]; exists { + if idx, exists := index[kf]; exists { entries[idx].members = append(entries[idx].members, r) } else { - index[k] = len(entries) - entries = append(entries, entry{key: k, members: []Record{r}}) + index[kf] = len(entries) + entries = append(entries, entry{key: kf, members: []Record{r}}) } } @@ -370,67 +275,31 @@ func FindAliasDuplicatesForMethods(records []Record, groups [][]string) [][]Reco return out } -// DeduplicateByAliasForMethods applies the same alias dedup logic as -// DeduplicateByAlias but only for records whose auth method appears in one of -// the provided groups. Each group is a slice of mount-type strings treated as -// one identity (e.g. ["ldap","oidc"]). Records whose method is not in any group -// pass through unchanged. Records with a blank EntityAliasName or that are PKI -// clients are always kept. -func DeduplicateByAliasForMethods(records []Record, groups [][]string) []Record { +// DeduplicateByAliasForMethodsPerFile deduplicates by alias scoped to each +// source file independently. Records in different files are never collapsed; +// only records from the same +// file with the same normalized alias and method group are deduplicated. +// Matching uses only the portion of the alias left of '@'; tier suffixes +// (-t0/-t1/-t2) are not stripped and must match exactly. +// Records with a blank EntityAliasName or that are PKI clients are always kept. +func DeduplicateByAliasForMethodsPerFile(records []Record, groups [][]string) []Record { groupMap := buildMethodGroupMap(groups) - seen := make(map[aliasKey]struct{}, len(records)) + seen := make(map[aliasKeyInFile]struct{}, len(records)) out := make([]Record, 0, len(records)) for _, r := range records { - if r.EntityAliasName == "" || IsPKIClient(r) { + if effectiveAliasInFile(r) == "" || IsPKIClient(r) { out = append(out, r) continue } - k, ok := aliasKeyForMethods(r, groupMap) + kf, ok := aliasKeyInFileFor(r, groupMap) if !ok { - // Method not in any group — pass through untouched. out = append(out, r) continue } - if _, dup := seen[k]; dup { - continue - } - seen[k] = struct{}{} - out = append(out, r) - } - return out -} - -// isJWT reports whether r was authenticated via JWT. -func isJWT(r Record) bool { - return r.MountType == "jwt" || r.AuthMethod == "jwt" -} - -// DeduplicateJWT drops JWT records whose normalized alias (StripTierSuffix + -// BaseAlias) matches a non-JWT record's normalized alias in the same source -// file. This prevents the same person from being counted once for their LDAP -// or OIDC identity and again for their JWT identity. Records without an alias -// are always kept. -func DeduplicateJWT(records []Record) []Record { - // Build global set of normalized aliases from all non-JWT records. - nonJWTAliases := make(map[string]struct{}) - for _, r := range records { - if isJWT(r) || r.EntityAliasName == "" { + if _, dup := seen[kf]; dup { continue } - norm := StripTierSuffix(BaseAlias(r.EntityAliasName)) - if norm != "" { - nonJWTAliases[norm] = struct{}{} - } - } - - out := make([]Record, 0, len(records)) - for _, r := range records { - if isJWT(r) && r.EntityAliasName != "" { - norm := StripTierSuffix(BaseAlias(r.EntityAliasName)) - if _, match := nonJWTAliases[norm]; match { - continue - } - } + seen[kf] = struct{}{} out = append(out, r) } return out @@ -518,6 +387,50 @@ func FilterByClientType(records []Record, clientType string) []Record { return out } +// AbandonedClientCounts reports how many anonymous records were removed by +// FilterAbandonedClients, split by whether an auth mount is present and +// whether the record is a PKI client. +type AbandonedClientCounts struct { + NoMount int + NoMountPKI int + MergedDeleted int + MergedDeletedPKI int +} + +// Total returns the sum of removed abandoned-client records. +func (c AbandonedClientCounts) Total() int { + return c.NoMount + c.MergedDeleted +} + +// FilterAbandonedClients removes records with no entity identity (both +// entity_name and entity_alias_name are blank) and reports separate counts for +// two cases: +// - NoMount: mount_path is blank (auth mount no longer exists) +// - MergedDeleted: mount_path is present (entity was likely merged/deleted) +func FilterAbandonedClients(records []Record) ([]Record, AbandonedClientCounts) { + out := make([]Record, 0, len(records)) + counts := AbandonedClientCounts{} + for _, r := range records { + if r.EntityName == "" && r.EntityAliasName == "" && r.ClientType == "entity" { + pki := IsPKIClient(r) + if r.MountPath == "" { + counts.NoMount++ + if pki { + counts.NoMountPKI++ + } + continue + } + counts.MergedDeleted++ + if pki { + counts.MergedDeletedPKI++ + } + continue + } + out = append(out, r) + } + return out, counts +} + // Sort sorts records in-place by the given column key. Returns an error if // the key is not recognized. func Sort(records []Record, by string) error { diff --git a/internal/normalizer/normalizer_test.go b/internal/normalizer/normalizer_test.go index 68adaa1..c70ee53 100644 --- a/internal/normalizer/normalizer_test.go +++ b/internal/normalizer/normalizer_test.go @@ -73,15 +73,16 @@ func TestParseTime(t *testing.T) { func TestNormalize(t *testing.T) { raw := []parser.RawRecord{ { - Source: "jan.csv", - ClientID: "abc-123", - NamespaceID: "", - NamespacePath: "root", - MountPath: "auth/approle", - MountType: "APPROLE", - AuthMethod: "AppRole", - ClientType: "non_entity", - TokenCreationTime: "2024-01-01T00:00:00Z", + Source: "jan.csv", + ClientID: "abc-123", + EntityName: " Alice Smith ", + NamespaceID: "", + NamespacePath: "root", + MountPath: "auth/approle", + MountType: "APPROLE", + AuthMethod: "AppRole", + ClientType: "non_entity", + TokenCreationTime: "2024-01-01T00:00:00Z", }, } records := Normalize(raw) @@ -89,6 +90,9 @@ func TestNormalize(t *testing.T) { t.Fatalf("expected 1 record, got %d", len(records)) } r := records[0] + if r.EntityName != "Alice Smith" { + t.Errorf("EntityName: got %q, want Alice Smith", r.EntityName) + } if r.NamespacePath != "[root]" { t.Errorf("NamespacePath: got %q, want [root]", r.NamespacePath) } @@ -139,37 +143,6 @@ func TestFilterByClientType(t *testing.T) { } } -func TestDeduplicate_PrefersNonEmptyMount(t *testing.T) { - records := []Record{ - {ClientID: "abc", MountPath: ""}, - {ClientID: "abc", MountPath: "auth/ldap/"}, - {ClientID: "xyz", MountPath: "auth/approle/"}, - {ClientID: "xyz", MountPath: ""}, - } - out := Deduplicate(records) - if len(out) != 2 { - t.Fatalf("expected 2 records after dedup, got %d", len(out)) - } - for _, r := range out { - if r.MountPath == "" { - t.Errorf("client %q kept empty-mount record when a non-empty mount was available", r.ClientID) - } - } -} - -func TestDeduplicate_KeepsFirstWhenBothEmpty(t *testing.T) { - records := []Record{ - {ClientID: "abc", MountPath: "", AuthMethod: "first"}, - {ClientID: "abc", MountPath: "", AuthMethod: "second"}, - } - out := Deduplicate(records) - if len(out) != 1 { - t.Fatalf("expected 1 record, got %d", len(out)) - } - if out[0].AuthMethod != "first" { - t.Errorf("expected first occurrence to be kept, got AuthMethod=%q", out[0].AuthMethod) - } -} func TestFilterSince(t *testing.T) { records := []Record{ @@ -224,7 +197,7 @@ func TestBaseAlias(t *testing.T) { {"alice@corp.com", "alice"}, {"sbishop@hashicorp.com", "sbishop"}, {"abc@234", "abc"}, - {"sbishop-t0", "sbishop-t0"}, // BaseAlias alone does not strip tier + {"sbishop-t0", "sbishop-t0"}, {"plain", "plain"}, {"", ""}, {"@leading", ""}, @@ -237,160 +210,6 @@ func TestBaseAlias(t *testing.T) { } } -func TestStripTierSuffix(t *testing.T) { - cases := []struct{ in, want string }{ - {"alice-t0", "alice"}, - {"alice-t1", "alice"}, - {"alice-t2", "alice"}, - {"alice-t3", "alice-t3"}, // only t0–t2 are stripped - {"alice-t10", "alice-t10"}, - {"alice-T0", "alice-T0"}, // case-sensitive - {"alice", "alice"}, - {"-t0", ""}, // degenerate: only the suffix - {"t0", "t0"}, // no hyphen - {"", ""}, - } - for _, c := range cases { - got := StripTierSuffix(c.in) - if got != c.want { - t.Errorf("StripTierSuffix(%q) = %q, want %q", c.in, got, c.want) - } - } -} - -func TestStripTierSuffix_AfterBaseAlias(t *testing.T) { - // The combination used by aliasKeyFor: strip domain then tier. - cases := []struct{ in, want string }{ - {"alice-t0@corp.com", "alice"}, - {"alice-t1@corp.com", "alice"}, - {"alice@corp.com", "alice"}, - {"alice-t0", "alice"}, - {"alice", "alice"}, - } - for _, c := range cases { - got := StripTierSuffix(BaseAlias(c.in)) - if got != c.want { - t.Errorf("StripTierSuffix(BaseAlias(%q)) = %q, want %q", c.in, got, c.want) - } - } -} - -func TestDeduplicateByAlias_CollapsesSameBaseAcrossAccessors(t *testing.T) { - // "sbishop", "sbishop@hashicorp.com", "sbishop-t0", "sbishop-t1", and - // "sbishop" in a second file all normalize to "sbishop" → only the first - // occurrence across all files is kept. - records := []Record{ - {ClientID: "1", EntityAliasName: "sbishop", MountAccessor: "auth_ldap_abc123", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "sbishop@hashicorp.com", MountAccessor: "auth_jwt_def456", Source: "jan.csv"}, // dup: normalizes to "sbishop" - {ClientID: "3", EntityAliasName: "sbishop-t0", MountAccessor: "auth_ldap_abc123", Source: "jan.csv"}, // dup: tier stripped → "sbishop" - {ClientID: "4", EntityAliasName: "sbishop-t1", MountAccessor: "auth_oidc_xyz789", Source: "jan.csv"}, // dup: tier stripped → "sbishop" - {ClientID: "5", EntityAliasName: "sbishop", MountAccessor: "auth_ldap_abc123", Source: "feb.csv"}, // dup: same normalized alias across files - {ClientID: "6", EntityAliasName: ""}, // kept: blank always kept - } - out := DeduplicateByAlias(records) - if len(out) != 2 { - t.Fatalf("expected 2 records, got %d: %v", len(out), clientIDs(out)) - } - kept := clientIDSet(out) - for _, id := range []string{"1", "6"} { - if !kept[id] { - t.Errorf("expected ClientID=%s to be kept", id) - } - } - for _, id := range []string{"2", "3", "4", "5"} { - if kept[id] { - t.Errorf("expected ClientID=%s to be dropped", id) - } - } -} - -func TestDeduplicateByAlias_KeepsAllBlanks(t *testing.T) { - records := []Record{ - {ClientID: "1", EntityAliasName: ""}, - {ClientID: "2", EntityAliasName: ""}, - {ClientID: "3", EntityAliasName: "alice@corp.com", Source: "jan.csv"}, - } - out := DeduplicateByAlias(records) - if len(out) != 3 { - t.Fatalf("expected 3 records (2 blanks + 1 aliased), got %d", len(out)) - } -} - -func TestFindAliasDuplicates_SameBaseAcrossAccessors(t *testing.T) { - // "sbishop", "sbishop@hashicorp.com", "sbishop-t0", and "sbishop" in a - // second file all normalize to "sbishop" → one group with 4 members. - records := []Record{ - {ClientID: "1", EntityAliasName: "sbishop", MountAccessor: "auth_ldap_abc123", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "sbishop@hashicorp.com", MountAccessor: "auth_jwt_def456", Source: "jan.csv"}, - {ClientID: "3", EntityAliasName: "sbishop-t0", MountAccessor: "auth_ldap_abc123", Source: "jan.csv"}, - {ClientID: "4", EntityAliasName: "sbishop", MountAccessor: "auth_ldap_abc123", Source: "feb.csv"}, // cross-file dup - {ClientID: "5", EntityAliasName: ""}, // ignored - } - groups := FindAliasDuplicates(records) - if len(groups) != 1 { - t.Fatalf("expected 1 duplicate group, got %d", len(groups)) - } - if len(groups[0]) != 4 { - t.Errorf("expected 4 members in group, got %d", len(groups[0])) - } - for _, r := range groups[0] { - if StripTierSuffix(BaseAlias(r.EntityAliasName)) != "sbishop" { - t.Errorf("unexpected record in group: %+v", r) - } - } -} - -func TestFindAliasDuplicates_NoDuplicates(t *testing.T) { - // All different normalized aliases — no duplicates regardless of file. - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "bob", Source: "jan.csv"}, - {ClientID: "3", EntityAliasName: "carol", Source: "feb.csv"}, - {ClientID: "4", EntityAliasName: ""}, - } - groups := FindAliasDuplicates(records) - if len(groups) != 0 { - t.Errorf("expected no duplicate groups, got %d", len(groups)) - } -} - -func TestDeduplicateByAlias_IgnoresPKIClients(t *testing.T) { - // PKI clients are always kept regardless of alias duplication. - // Non-PKI clients with the same base alias in the same file are deduplicated. - records := []Record{ - {ClientID: "1", EntityAliasName: "abc-123", ClientType: "acme", Source: "jan.csv"}, // PKI, kept - {ClientID: "2", EntityAliasName: "abc-456", ClientType: "acme", Source: "jan.csv"}, // PKI, kept (not deduped) - {ClientID: "3", EntityAliasName: "abc-789", MountAccessor: "auth_cert_xyz", Source: "jan.csv"}, // cert auth — PKI, kept - {ClientID: "4", EntityAliasName: "alice@corp", Source: "jan.csv"}, // non-PKI, first: kept - {ClientID: "5", EntityAliasName: "alice@example.com", Source: "jan.csv"}, // non-PKI dup: base "alice" already seen, dropped - } - out := DeduplicateByAlias(records) - if len(out) != 4 { - t.Fatalf("expected 4 records (3 PKI/cert + 1 non-PKI), got %d: %v", len(out), clientIDs(out)) - } - kept := clientIDSet(out) - for _, id := range []string{"1", "2", "3", "4"} { - if !kept[id] { - t.Errorf("expected ClientID=%s to be kept", id) - } - } - if kept["5"] { - t.Errorf("expected ClientID=5 (non-PKI dup) to be dropped") - } -} - -func TestFindAliasDuplicates_IgnoresPKIClients(t *testing.T) { - records := []Record{ - {ClientID: "1", EntityAliasName: "abc-123", ClientType: "acme", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "abc-456", ClientType: "acme", Source: "jan.csv"}, - {ClientID: "3", EntityAliasName: "abc-789", MountAccessor: "auth_cert_xyz", Source: "jan.csv"}, - } - groups := FindAliasDuplicates(records) - if len(groups) != 0 { - t.Errorf("expected no duplicate groups (all PKI/cert), got %d", len(groups)) - } -} - // helpers for alias dedup tests func clientIDs(records []Record) []string { ids := make([]string, len(records)) @@ -547,7 +366,6 @@ func TestPartitionPKI_NoPKI(t *testing.T) { } } - func TestPartitionPKI_Empty(t *testing.T) { pki, nonPKI := PartitionPKI(nil, IsPKIClient) if pki != nil || nonPKI != nil { @@ -567,8 +385,8 @@ func TestFilterSincePerSource_FiltersTargetFileOnly(t *testing.T) { records := []Record{ // jan.csv: one record before cutoff, one after {ClientID: "j1", Source: "jan.csv", TokenCreationTime: jan15.Add(-24 * time.Hour)}, // before — excluded - {ClientID: "j2", Source: "jan.csv", TokenCreationTime: jan15}, // on cutoff — kept - {ClientID: "j3", Source: "jan.csv", TokenCreationTime: jan20}, // after — kept + {ClientID: "j2", Source: "jan.csv", TokenCreationTime: jan15}, // on cutoff — kept + {ClientID: "j3", Source: "jan.csv", TokenCreationTime: jan20}, // after — kept // feb.csv: not in filter map — all kept regardless of date {ClientID: "f1", Source: "feb.csv", TokenCreationTime: jan15.Add(-24 * time.Hour)}, // old but kept {ClientID: "f2", Source: "feb.csv", TokenCreationTime: feb01}, @@ -666,421 +484,6 @@ func TestFilterSincePerSource_EmptyMap(t *testing.T) { } } -// ── JWT deduplication ───────────────────────────────────────────────────────── - -func TestDeduplicateJWT_DropsJWTMatchingNonJWT(t *testing.T) { - // alice authenticates via LDAP (kept) and JWT (dropped — same normalized alias). - // bob has only a JWT record (kept — no non-JWT match). - // carol has a JWT record with no alias (always kept). - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"}, // dropped: normalizes to "alice", matches LDAP - {ClientID: "3", EntityAliasName: "bob@corp.com", MountType: "jwt", Source: "jan.csv"}, // kept: no non-JWT match for "bob" - {ClientID: "4", EntityAliasName: "", MountType: "jwt", Source: "jan.csv"}, // kept: blank alias always kept - } - out := DeduplicateJWT(records) - if len(out) != 3 { - t.Fatalf("expected 3 records, got %d: %v", len(out), clientIDs(out)) - } - kept := clientIDSet(out) - for _, id := range []string{"1", "3", "4"} { - if !kept[id] { - t.Errorf("expected ClientID=%s to be kept", id) - } - } - if kept["2"] { - t.Error("expected ClientID=2 (JWT dup of LDAP alice) to be dropped") - } -} - -func TestDeduplicateJWT_TierNormalizationApplied(t *testing.T) { - // LDAP alias is "alice-t0" (normalizes to "alice"). - // JWT alias is "alice@corp.com" (normalizes to "alice"). - // They match → JWT dropped. - records := []Record{ - {ClientID: "1", EntityAliasName: "alice-t0", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"}, - } - out := DeduplicateJWT(records) - if len(out) != 1 { - t.Fatalf("expected 1 record, got %d: %v", len(out), clientIDs(out)) - } - if out[0].ClientID != "1" { - t.Errorf("expected LDAP record to be kept, got ClientID=%s", out[0].ClientID) - } -} - -func TestDeduplicateJWT_MatchesAcrossFiles(t *testing.T) { - // JWT record in feb.csv matches an LDAP alias in jan.csv — cross-file match - // is intentional, JWT record is dropped. - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "feb.csv"}, - } - out := DeduplicateJWT(records) - if len(out) != 1 { - t.Fatalf("expected 1 record (cross-file JWT match dropped), got %d", len(out)) - } - if out[0].ClientID != "1" { - t.Errorf("expected LDAP record kept, got ClientID=%s", out[0].ClientID) - } -} - -func TestDeduplicateJWT_AuthMethodFallback(t *testing.T) { - // JWT identified via auth_method rather than mount_type. - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", AuthMethod: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice@corp.com", AuthMethod: "jwt", Source: "jan.csv"}, - } - out := DeduplicateJWT(records) - if len(out) != 1 { - t.Fatalf("expected 1 record, got %d: %v", len(out), clientIDs(out)) - } - if out[0].ClientID != "1" { - t.Errorf("expected LDAP record kept, got ClientID=%s", out[0].ClientID) - } -} - -func TestDeduplicateJWT_NonJWTRecordsUnaffected(t *testing.T) { - // No JWT records — nothing should be dropped. - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "bob", MountType: "oidc", Source: "jan.csv"}, - } - out := DeduplicateJWT(records) - if len(out) != 2 { - t.Fatalf("expected 2 records, got %d", len(out)) - } -} - -// ── combined alias + client_id deduplication ───────────────────────────────── - -func TestDeduplicateByAlias_ThenDeduplicate_CollapsesBothDimensions(t *testing.T) { - // --dedup-alias runs first (within-file tier/domain collapse), then -d - // (cross-file client_id collapse). Together they handle the case where the - // same person appears as different alias variants in the same file AND as the - // same client_id across multiple files. - // - // jan.csv: alice (id:1) and alice-t0 (id:2) → alias dedup keeps id:1, drops id:2 - // feb.csv: alice (id:1) → same client_id as jan.csv survivor → -d drops it - // jan.csv: bob (id:3) → distinct alias and id → kept throughout - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice-t0", Source: "jan.csv"}, // dropped by alias dedup (tier → "alice") - {ClientID: "1", EntityAliasName: "alice", Source: "feb.csv"}, // dropped by -d (same id as jan survivor) - {ClientID: "3", EntityAliasName: "bob", Source: "jan.csv"}, - } - - afterAlias := DeduplicateByAlias(records) - afterBoth := Deduplicate(afterAlias) - - if len(afterBoth) != 2 { - t.Fatalf("expected 2 records, got %d: %v", len(afterBoth), clientIDs(afterBoth)) - } - kept := clientIDSet(afterBoth) - if !kept["1"] { - t.Error("expected id:1 to be kept") - } - if !kept["3"] { - t.Error("expected id:3 to be kept") - } - if kept["2"] { - t.Error("expected id:2 to be dropped by alias dedup") - } -} - -func TestDeduplicateByAlias_CollapsesAcrossFiles(t *testing.T) { - // alice-t0 in jan.csv and alice-t1 in feb.csv both normalize to "alice" → - // alias dedup keeps only the first occurrence regardless of file. - records := []Record{ - {ClientID: "1", EntityAliasName: "alice-t0", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice-t1", Source: "feb.csv"}, - } - - out := DeduplicateByAlias(records) - - if len(out) != 1 { - t.Fatalf("expected 1 record (cross-file tier collapse), got %d", len(out)) - } - if out[0].ClientID != "1" { - t.Errorf("expected first occurrence (id:1) to be kept, got id:%s", out[0].ClientID) - } -} - -func TestDeduplicateByAlias_CollapseOIDCWithLDAP(t *testing.T) { - // LDAP and OIDC share the same identity group, so the same normalized alias - // across both auth methods is treated as one client. - // JWT remains a separate group and is not collapsed here. - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, // dup: ldap/oidc group, normalizes to "alice" - {ClientID: "3", EntityAliasName: "alice-t0", MountType: "ldap", Source: "feb.csv"}, // dup: ldap/oidc group, tier stripped → "alice" - {ClientID: "4", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"}, // kept: jwt is a separate group - {ClientID: "5", EntityAliasName: "bob", MountType: "ldap", Source: "jan.csv"}, // kept: different alias - } - out := DeduplicateByAlias(records) - if len(out) != 3 { - t.Fatalf("expected 3 records, got %d: %v", len(out), clientIDs(out)) - } - kept := clientIDSet(out) - for _, id := range []string{"1", "4", "5"} { - if !kept[id] { - t.Errorf("expected ClientID=%s to be kept", id) - } - } - for _, id := range []string{"2", "3"} { - if kept[id] { - t.Errorf("expected ClientID=%s to be dropped (same ldap/oidc group)", id) - } - } -} - -func TestDeduplicateByAlias_ScopedToMountType(t *testing.T) { - // alice on LDAP and alice@corp.com on JWT share a normalized alias but have - // different mount types → --dedup-alias does NOT collapse them. Use - // --dedup-jwt to additionally collapse cross-auth-method duplicates. - // alice-t0 and alice on LDAP share both the normalized alias AND mount type - // → they ARE collapsed. - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice-t0", MountType: "ldap", Source: "jan.csv"}, // dup: same type + base - {ClientID: "3", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"}, // kept: different mount type - } - out := DeduplicateByAlias(records) - if len(out) != 2 { - t.Fatalf("expected 2 records (alice/ldap + alice/jwt), got %d: %v", len(out), clientIDs(out)) - } - kept := clientIDSet(out) - if !kept["1"] { - t.Error("expected id:1 (alice ldap) to be kept") - } - if !kept["3"] { - t.Error("expected id:3 (alice jwt) to be kept — different mount type, requires --dedup-jwt") - } - if kept["2"] { - t.Error("expected id:2 (alice-t0 ldap) to be dropped — same mount type and normalized alias") - } -} - -// Regression: tiered accounts across files must be collapsed. -// Before the fix, aliasKey included the source filename, so alice-t0 in -// jan.csv and alice in feb.csv hashed to different keys and were never -// compared — each was counted as a separate client. -func TestDeduplicateByAlias_TieredAccountsAcrossFiles(t *testing.T) { - records := []Record{ - {ClientID: "1", EntityAliasName: "alice-t0", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice", Source: "feb.csv"}, // same person, different tier label - {ClientID: "3", EntityAliasName: "alice-t1", Source: "mar.csv"}, // same person, third file - {ClientID: "4", EntityAliasName: "bob", Source: "jan.csv"}, // different person, kept - } - out := DeduplicateByAlias(records) - if len(out) != 2 { - t.Fatalf("expected 2 records (alice collapsed to 1, bob kept), got %d: %v", len(out), clientIDs(out)) - } - kept := clientIDSet(out) - if !kept["1"] { - t.Error("expected first alice occurrence (id:1) to be kept") - } - if !kept["4"] { - t.Error("expected bob (id:4) to be kept") - } - for _, id := range []string{"2", "3"} { - if kept[id] { - t.Errorf("expected ClientID=%s (tier variant of alice) to be dropped", id) - } - } -} - -// ── method-scoped alias deduplication ──────────────────────────────────────── - -func TestDeduplicateByAliasForMethods_LDAPAndOIDCGroup(t *testing.T) { - // Same as -dedup-alias LDAP/OIDC behavior, but specified explicitly. - // alice via LDAP is kept; alice@corp.com via OIDC is dropped (same group). - // alice via JWT is kept (not in the group). - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, // dropped - {ClientID: "3", EntityAliasName: "alice-t0", MountType: "ldap", Source: "feb.csv"}, // dropped: tier stripped - {ClientID: "4", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"}, // kept: jwt not in group - {ClientID: "5", EntityAliasName: "bob", MountType: "ldap", Source: "jan.csv"}, // kept: different alias - } - groups := [][]string{{"ldap", "oidc"}} - out := DeduplicateByAliasForMethods(records, groups) - if len(out) != 3 { - t.Fatalf("expected 3 records, got %d: %v", len(out), clientIDs(out)) - } - kept := clientIDSet(out) - for _, id := range []string{"1", "4", "5"} { - if !kept[id] { - t.Errorf("expected ClientID=%s to be kept", id) - } - } - for _, id := range []string{"2", "3"} { - if kept[id] { - t.Errorf("expected ClientID=%s to be dropped", id) - } - } -} - -func TestDeduplicateByAliasForMethods_MethodsNotInGroupPassThrough(t *testing.T) { - // approle records are not in any group and must pass through untouched, - // even if two share the same alias. - records := []Record{ - {ClientID: "1", EntityAliasName: "svc-account", MountType: "approle", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "svc-account", MountType: "approle", Source: "jan.csv"}, // NOT deduped - {ClientID: "3", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "4", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, // dropped - } - groups := [][]string{{"ldap", "oidc"}} - out := DeduplicateByAliasForMethods(records, groups) - if len(out) != 3 { - t.Fatalf("expected 3 records (2 approle + 1 ldap), got %d: %v", len(out), clientIDs(out)) - } - kept := clientIDSet(out) - for _, id := range []string{"1", "2", "3"} { - if !kept[id] { - t.Errorf("expected ClientID=%s to be kept", id) - } - } - if kept["4"] { - t.Error("expected ClientID=4 (oidc dup) to be dropped") - } -} - -func TestDeduplicateByAliasForMethods_MultipleIndependentGroups(t *testing.T) { - // Group 1: {ldap, oidc}; Group 2: {jwt, saml} - // alice/ldap and alice/oidc collapse → 1 kept - // alice/jwt and alice/saml collapse → 1 kept - // The two groups don't interact with each other. - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, // dropped (group 1) - {ClientID: "3", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"}, // kept (group 2 first) - {ClientID: "4", EntityAliasName: "alice", MountType: "saml", Source: "jan.csv"}, // dropped (group 2) - {ClientID: "5", EntityAliasName: "bob", MountType: "ldap", Source: "jan.csv"}, // kept: different alias - } - groups := [][]string{{"ldap", "oidc"}, {"jwt", "saml"}} - out := DeduplicateByAliasForMethods(records, groups) - if len(out) != 3 { - t.Fatalf("expected 3 records, got %d: %v", len(out), clientIDs(out)) - } - kept := clientIDSet(out) - for _, id := range []string{"1", "3", "5"} { - if !kept[id] { - t.Errorf("expected ClientID=%s to be kept", id) - } - } - for _, id := range []string{"2", "4"} { - if kept[id] { - t.Errorf("expected ClientID=%s to be dropped", id) - } - } -} - -func TestDeduplicateByAliasForMethods_ThreeMethodsOneGroup(t *testing.T) { - // ldap, oidc, jwt all in one group — alice across all three collapses to 1. - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, // dropped - {ClientID: "3", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"}, // dropped - {ClientID: "4", EntityAliasName: "bob", MountType: "jwt", Source: "jan.csv"}, // kept: different alias - } - groups := [][]string{{"ldap", "oidc", "jwt"}} - out := DeduplicateByAliasForMethods(records, groups) - if len(out) != 2 { - t.Fatalf("expected 2 records, got %d: %v", len(out), clientIDs(out)) - } - kept := clientIDSet(out) - if !kept["1"] { - t.Error("expected id:1 (alice ldap, first occurrence) to be kept") - } - if !kept["4"] { - t.Error("expected id:4 (bob) to be kept") - } -} - -func TestDeduplicateByAliasForMethods_BlankAliasAlwaysKept(t *testing.T) { - records := []Record{ - {ClientID: "1", EntityAliasName: "", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "", MountType: "oidc", Source: "jan.csv"}, - {ClientID: "3", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"}, - } - groups := [][]string{{"ldap", "oidc"}} - out := DeduplicateByAliasForMethods(records, groups) - if len(out) != 3 { - t.Fatalf("expected 3 records (2 blank + 1 aliased), got %d", len(out)) - } -} - -func TestDeduplicateByAliasForMethods_PKIClientsAlwaysKept(t *testing.T) { - records := []Record{ - {ClientID: "1", EntityAliasName: "abc-123", ClientType: "acme", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "abc-123", ClientType: "acme", MountType: "oidc", Source: "jan.csv"}, - {ClientID: "3", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "4", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, // dropped - } - groups := [][]string{{"ldap", "oidc"}} - out := DeduplicateByAliasForMethods(records, groups) - if len(out) != 3 { - t.Fatalf("expected 3 records (2 PKI + 1 non-PKI), got %d: %v", len(out), clientIDs(out)) - } - kept := clientIDSet(out) - for _, id := range []string{"1", "2", "3"} { - if !kept[id] { - t.Errorf("expected ClientID=%s to be kept", id) - } - } - if kept["4"] { - t.Error("expected ClientID=4 to be dropped") - } -} - -func TestDeduplicateByAliasForMethods_AuthMethodFallback(t *testing.T) { - // MountType is blank; dedup should fall back to AuthMethod. - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", AuthMethod: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice@corp.com", AuthMethod: "oidc", Source: "jan.csv"}, // dropped - } - groups := [][]string{{"ldap", "oidc"}} - out := DeduplicateByAliasForMethods(records, groups) - if len(out) != 1 { - t.Fatalf("expected 1 record, got %d: %v", len(out), clientIDs(out)) - } - if out[0].ClientID != "1" { - t.Errorf("expected id:1 to be kept, got %s", out[0].ClientID) - } -} - -func TestFindAliasDuplicatesForMethods_ReportsGroupsOnly(t *testing.T) { - // Only ldap and oidc records should be reported as duplicates. - // approle records with the same alias are not in the group and not reported. - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, - {ClientID: "3", EntityAliasName: "alice", MountType: "approle", Source: "jan.csv"}, // not in group - } - groups := [][]string{{"ldap", "oidc"}} - dups := FindAliasDuplicatesForMethods(records, groups) - if len(dups) != 1 { - t.Fatalf("expected 1 duplicate group, got %d", len(dups)) - } - if len(dups[0]) != 2 { - t.Errorf("expected 2 members in group (ldap + oidc), got %d", len(dups[0])) - } -} - -func TestFindAliasDuplicatesForMethods_NoDuplicates(t *testing.T) { - records := []Record{ - {ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"}, - {ClientID: "2", EntityAliasName: "bob", MountType: "oidc", Source: "jan.csv"}, - } - groups := [][]string{{"ldap", "oidc"}} - dups := FindAliasDuplicatesForMethods(records, groups) - if len(dups) != 0 { - t.Errorf("expected no duplicate groups, got %d", len(dups)) - } -} - // ── input mutation safety ───────────────────────────────────────────────────── // These tests guard against the records[:0] pattern, which reuses the backing // array and silently corrupts the caller's slice. Each filter must not modify diff --git a/internal/parser/parser.go b/internal/parser/parser.go index 98a1199..394b757 100644 --- a/internal/parser/parser.go +++ b/internal/parser/parser.go @@ -17,23 +17,26 @@ type RawRecord struct { // Source tracks which file this record came from. Source string - ClientID string - NamespaceID string - NamespacePath string - MountAccessor string - MountPath string - MountType string - AuthMethod string - ClientType string - TokenCreationTime string // may be populated from legacy "timestamp" column - ClientFirstUsageTime string - EntityAliasName string + ClientID string + EntityName string + NamespaceID string + NamespacePath string + MountAccessor string + MountPath string + MountType string + AuthMethod string + ClientType string + TokenCreationTime string // may be populated from legacy "timestamp" column + ClientFirstUsageTime string + EntityAliasName string + EntityAliasMetadataUsername string } // knownColumns maps all recognised (lowercased, trimmed) header variants to // a canonical field name used by the column mapper below. var knownColumns = map[string]string{ "client_id": "client_id", + "entity_name": "entity_name", "namespace_id": "namespace_id", "namespace_path": "namespace_path", "mount_accessor": "mount_accessor", @@ -43,7 +46,8 @@ var knownColumns = map[string]string{ "client_type": "client_type", "token_creation_time": "token_creation_time", "client_first_usage_time": "client_first_usage_time", - "entity_alias_name": "entity_alias_name", + "entity_alias_name": "entity_alias_name", + "entity_alias_metadata.username": "entity_alias_metadata_username", // Legacy / alternative column names: "timestamp": "token_creation_time", // Vault < 1.17 "first_seen": "client_first_usage_time", @@ -123,18 +127,20 @@ func parseReader(r io.Reader, source string) ([]RawRecord, error) { } records = append(records, RawRecord{ - Source: source, - ClientID: clientID, - NamespaceID: get(row, "namespace_id"), - NamespacePath: get(row, "namespace_path"), - MountAccessor: get(row, "mount_accessor"), - MountPath: get(row, "mount_path"), - MountType: get(row, "mount_type"), - AuthMethod: get(row, "auth_method"), - ClientType: get(row, "client_type"), - TokenCreationTime: get(row, "token_creation_time"), - ClientFirstUsageTime: get(row, "client_first_usage_time"), - EntityAliasName: get(row, "entity_alias_name"), + Source: source, + ClientID: clientID, + EntityName: get(row, "entity_name"), + NamespaceID: get(row, "namespace_id"), + NamespacePath: get(row, "namespace_path"), + MountAccessor: get(row, "mount_accessor"), + MountPath: get(row, "mount_path"), + MountType: get(row, "mount_type"), + AuthMethod: get(row, "auth_method"), + ClientType: get(row, "client_type"), + TokenCreationTime: get(row, "token_creation_time"), + ClientFirstUsageTime: get(row, "client_first_usage_time"), + EntityAliasName: get(row, "entity_alias_name"), + EntityAliasMetadataUsername: get(row, "entity_alias_metadata_username"), }) } diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go index e1a1f02..21d7e93 100644 --- a/internal/parser/parser_test.go +++ b/internal/parser/parser_test.go @@ -6,9 +6,9 @@ import ( ) func TestParseReader_StandardColumns(t *testing.T) { - csv := `client_id,namespace_id,namespace_path,mount_accessor,mount_path,mount_type,auth_method,client_type,token_creation_time,client_first_usage_time -abc-123,root,[root],auth_approle_abc,auth/approle/,approle,approle,entity,2024-01-15T10:00:00Z,2024-01-15T12:00:00Z -def-456,ns1,education/,auth_ldap_xyz,auth/ldap/,ldap,ldap,non-entity,2024-02-01T08:00:00Z, + csv := `client_id,entity_name,namespace_id,namespace_path,mount_accessor,mount_path,mount_type,auth_method,client_type,token_creation_time,client_first_usage_time +abc-123,Alice Smith,root,[root],auth_approle_abc,auth/approle/,approle,approle,entity,2024-01-15T10:00:00Z,2024-01-15T12:00:00Z +def-456,,ns1,education/,auth_ldap_xyz,auth/ldap/,ldap,ldap,non-entity,2024-02-01T08:00:00Z, ` records, err := parseReader(strings.NewReader(csv), "test.csv") if err != nil { @@ -20,6 +20,7 @@ def-456,ns1,education/,auth_ldap_xyz,auth/ldap/,ldap,ldap,non-entity,2024-02-01T r := records[0] assertEqual(t, "client_id", "abc-123", r.ClientID) + assertEqual(t, "entity_name", "Alice Smith", r.EntityName) assertEqual(t, "namespace_id", "root", r.NamespaceID) assertEqual(t, "namespace_path", "[root]", r.NamespacePath) assertEqual(t, "mount_accessor", "auth_approle_abc", r.MountAccessor) @@ -30,6 +31,7 @@ def-456,ns1,education/,auth_ldap_xyz,auth/ldap/,ldap,ldap,non-entity,2024-02-01T assertEqual(t, "client_first_usage_time", "2024-01-15T12:00:00Z", r.ClientFirstUsageTime) r2 := records[1] + assertEqual(t, "entity_name_empty", "", r2.EntityName) assertEqual(t, "client_first_usage_time_empty", "", r2.ClientFirstUsageTime) } diff --git a/internal/renderer/renderer.go b/internal/renderer/renderer.go index 2d788ff..b1e2659 100644 --- a/internal/renderer/renderer.go +++ b/internal/renderer/renderer.go @@ -27,16 +27,6 @@ var columns = []column{ width: 16, get: func(r normalizer.Record) string { return r.NamespacePath }, }, - { - header: "Client Type", - width: 12, - get: func(r normalizer.Record) string { return r.ClientType }, - }, - { - header: "Auth Method", - width: 12, - get: func(r normalizer.Record) string { return r.AuthMethod }, - }, { header: "Mount Path", width: 12, @@ -74,24 +64,44 @@ var aliasColumn = column{ get: func(r normalizer.Record) string { return r.EntityAliasName }, } +var oidcUsernameColumn = column{ + header: "OIDC Username", + width: 13, + get: func(r normalizer.Record) string { return r.EntityAliasMetadataUsername }, +} + // PrintTable writes the records as a plain-text table to w. If any record has // a non-empty EntityAliasName, an Entity Alias column is appended so the -// original alias values are visible in alias deduplication output. +// original alias values are visible in alias deduplication output. If any +// record has a non-empty EntityAliasMetadataUsername, an OIDC Username column +// is also appended. func PrintTable(w io.Writer, records []normalizer.Record) { if len(records) == 0 { fmt.Fprintln(w, "(no records to display)") return } - // Build column list, appending the alias column only when the data has it. + // Build column list, appending extra columns only when the data has them. cols := make([]column, len(columns)) copy(cols, columns) + var hasAlias, hasOIDCUsername bool for _, r := range records { if r.EntityAliasName != "" { - cols = append(cols, aliasColumn) + hasAlias = true + } + if r.EntityAliasMetadataUsername != "" { + hasOIDCUsername = true + } + if hasAlias && hasOIDCUsername { break } } + if hasAlias { + cols = append(cols, aliasColumn) + } + if hasOIDCUsername { + cols = append(cols, oidcUsernameColumn) + } for _, r := range records { for i, c := range cols { diff --git a/internal/renderer/renderer_test.go b/internal/renderer/renderer_test.go index 383b91c..f775f60 100644 --- a/internal/renderer/renderer_test.go +++ b/internal/renderer/renderer_test.go @@ -46,8 +46,8 @@ func TestPrintTable_RendersRows(t *testing.T) { if !strings.Contains(out, "Namespace Path") { t.Error("expected header 'Namespace Path'") } - if !strings.Contains(out, "Client Type") { - t.Error("expected header 'Client Type'") + if strings.Contains(out, "Client Type") { + t.Error("unexpected header 'Client Type' — removed from table output") } // Data rows present @@ -57,9 +57,6 @@ func TestPrintTable_RendersRows(t *testing.T) { if !strings.Contains(out, "education/") { t.Error("expected namespace 'education/'") } - if !strings.Contains(out, "non-entity") { - t.Error("expected client type 'non-entity'") - } } func TestPrintTable_ZeroTimeFmtDash(t *testing.T) { diff --git a/internal/tfgen/tfgen.go b/internal/tfgen/tfgen.go new file mode 100644 index 0000000..a0abdb7 --- /dev/null +++ b/internal/tfgen/tfgen.go @@ -0,0 +1,201 @@ +// Package tfgen generates Terraform HCL to consolidate per-auth-method Vault +// client records into a single entity with multiple aliases. Each duplicate +// group found by --dedup-methods-per-file becomes one vault_identity_entity +// and one vault_identity_entity_alias per auth method in that group. +package tfgen + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/vault-csv-normalizer/internal/normalizer" +) + +var petAdjectives = []string{ + "amber", "bold", "calm", "dark", "eager", "fair", "glad", "hardy", + "ivory", "jolly", "keen", "lofty", "merry", "noble", "proud", + "quiet", "rapid", "silver", "tawny", "vivid", "warm", "young", +} + +var petNouns = []string{ + "bear", "crane", "deer", "eagle", "fox", "goose", "hawk", "ibis", + "jay", "kite", "lark", "mole", "newt", "otter", "panda", "quail", + "raven", "swift", "teal", "vole", "wren", "yak", "zebra", +} + +// nextPetName picks the next unused adj_noun name from the wordlists. +func nextPetName(used map[string]struct{}, counter *int) string { + total := len(petAdjectives) * len(petNouns) + for i := 0; i < total; i++ { + idx := (*counter + i) % total + name := petAdjectives[idx/len(petNouns)] + "_" + petNouns[idx%len(petNouns)] + if _, ok := used[name]; !ok { + *counter = (idx + 1) % total + used[name] = struct{}{} + return name + } + } + *counter++ + name := fmt.Sprintf("entity_%d", *counter) + used[name] = struct{}{} + return name +} + +// sanitizeID converts a string to a valid Terraform identifier. +func sanitizeID(s string) string { + var b strings.Builder + for _, ch := range strings.ToLower(s) { + if (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') { + b.WriteRune(ch) + } else { + b.WriteByte('_') + } + } + result := b.String() + for strings.Contains(result, "__") { + result = strings.ReplaceAll(result, "__", "_") + } + return strings.Trim(result, "_") +} + +// effectiveAlias returns the human-readable alias for a record. For OIDC, +// entity_alias_metadata_username holds the username; entity_alias_name may be +// a subject identifier that doesn't match other methods. +func effectiveAlias(r normalizer.Record) string { + if (r.MountType == "oidc" || r.AuthMethod == "oidc") && r.EntityAliasMetadataUsername != "" { + return r.EntityAliasMetadataUsername + } + return r.EntityAliasName +} + +// GenerateTF writes Terraform HCL to outputPath for each alias duplicate group. +// groups is the output of normalizer.FindAliasDuplicatesForMethodsPerFile — each +// inner slice is a set of records that represent the same person authenticated via +// different auth methods. For each group, one vault_identity_entity resource and +// one vault_identity_entity_alias per record are generated, consolidating the +// separate per-method client records into a single Vault entity. +// Returns the number of entity stubs written. +func GenerateTF(groups [][]normalizer.Record, outputPath string) (int, error) { + if len(groups) == 0 { + return 0, nil + } + + type mountInfo struct { + accessor string + mountPath string + mountType string + varName string + } + + // Collect unique mount accessors across all groups. + accessorSeen := make(map[string]*mountInfo) + var accessorOrder []string + for _, group := range groups { + for _, r := range group { + if r.MountAccessor == "" || accessorSeen[r.MountAccessor] != nil { + continue + } + mi := &mountInfo{ + accessor: r.MountAccessor, + mountPath: r.MountPath, + mountType: r.MountType, + varName: "accessor_" + sanitizeID(r.MountAccessor), + } + accessorSeen[r.MountAccessor] = mi + accessorOrder = append(accessorOrder, r.MountAccessor) + } + } + + var sb strings.Builder + + sb.WriteString("# Generated by vault-csv-normalizer\n") + sb.WriteString("# Each entity block consolidates records that represent the same person\n") + sb.WriteString("# authenticated via different auth methods within a single billing period.\n") + sb.WriteString("# Verify names before applying.\n\n") + + for _, acc := range accessorOrder { + mi := accessorSeen[acc] + desc := mi.mountPath + if mi.mountType != "" { + desc += " (" + mi.mountType + ")" + } + sb.WriteString(fmt.Sprintf("variable %q {\n", mi.varName)) + sb.WriteString(fmt.Sprintf(" description = %q\n", "Mount accessor for "+strings.TrimSpace(desc))) + sb.WriteString(" type = string\n") + sb.WriteString(fmt.Sprintf(" default = %q\n", mi.accessor)) + sb.WriteString("}\n\n") + } + + // Group duplicate groups by source file, preserving order of first appearance. + var fileOrder []string + byFile := make(map[string][][]normalizer.Record) + for _, group := range groups { + source := filepath.Base(group[0].Source) + if _, ok := byFile[source]; !ok { + fileOrder = append(fileOrder, source) + } + byFile[source] = append(byFile[source], group) + } + + used := make(map[string]struct{}) + counter := 0 + + for _, source := range fileOrder { + fileGroups := byFile[source] + divider := strings.Repeat("#", 60) + sb.WriteString(fmt.Sprintf("%s\n# Source: %s (%d alias group(s))\n%s\n\n", + divider, source, len(fileGroups), divider)) + + for _, group := range fileGroups { + r0 := group[0] + entityName := normalizer.BaseAlias(effectiveAlias(r0)) + + methods := make([]string, 0, len(group)) + for _, r := range group { + mt := r.MountType + if mt == "" { + mt = r.AuthMethod + } + methods = append(methods, mt) + } + + petname := nextPetName(used, &counter) + + sb.WriteString(fmt.Sprintf("# alias: %s | methods: %s\n", + entityName, strings.Join(methods, ", "))) + + sb.WriteString(fmt.Sprintf("resource \"vault_identity_entity\" %q {\n", petname)) + sb.WriteString(fmt.Sprintf(" name = %q\n", entityName)) + sb.WriteString("}\n\n") + + for i, r := range group { + mt := r.MountType + if mt == "" { + mt = r.AuthMethod + } + aliasName := r.EntityAliasName + if aliasName == "" { + aliasName = "TODO" + } + aliasResource := fmt.Sprintf("%s_%d", petname, i) + + sb.WriteString(fmt.Sprintf("resource \"vault_identity_entity_alias\" %q {\n", aliasResource)) + sb.WriteString(fmt.Sprintf(" name = %q # %s\n", aliasName, mt)) + if mi := accessorSeen[r.MountAccessor]; mi != nil { + sb.WriteString(fmt.Sprintf(" mount_accessor = var.%s\n", mi.varName)) + } else { + sb.WriteString(" mount_accessor = \"TODO\" # mount_accessor not in export\n") + } + sb.WriteString(fmt.Sprintf(" canonical_id = vault_identity_entity.%s.id\n", petname)) + sb.WriteString("}\n\n") + } + } + } + + if err := os.WriteFile(outputPath, []byte(sb.String()), 0644); err != nil { + return 0, fmt.Errorf("writing %s: %w", outputPath, err) + } + return len(groups), nil +} diff --git a/internal/tfgen/tfgen_test.go b/internal/tfgen/tfgen_test.go new file mode 100644 index 0000000..84edb44 --- /dev/null +++ b/internal/tfgen/tfgen_test.go @@ -0,0 +1,252 @@ +package tfgen + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/vault-csv-normalizer/internal/normalizer" +) + +func TestGenerateTF_EmptyGroups(t *testing.T) { + out := filepath.Join(t.TempDir(), "out.tf") + n, err := GenerateTF(nil, out) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if n != 0 { + t.Errorf("expected 0 stubs, got %d", n) + } + if _, err := os.Stat(out); !os.IsNotExist(err) { + t.Error("expected no file to be written for empty groups") + } +} + +func TestGenerateTF_SingleGroup(t *testing.T) { + groups := [][]normalizer.Record{ + { + { + ClientID: "ldap-001", + Source: "jan.csv", + MountAccessor: "auth_ldap_abc", + MountPath: "auth/ldap/", + MountType: "ldap", + ClientType: "entity", + EntityAliasName: "alice", + }, + { + ClientID: "oidc-001", + Source: "jan.csv", + MountAccessor: "auth_oidc_xyz", + MountPath: "auth/oidc/", + MountType: "oidc", + ClientType: "entity", + EntityAliasName: "alice@corp.com", + }, + }, + } + + out := filepath.Join(t.TempDir(), "out.tf") + n, err := GenerateTF(groups, out) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if n != 1 { + t.Errorf("expected 1 stub, got %d", n) + } + + content, err := os.ReadFile(out) + if err != nil { + t.Fatalf("reading output: %v", err) + } + tf := string(content) + + // One entity resource + if count := strings.Count(tf, "resource \"vault_identity_entity\""); count != 1 { + t.Errorf("expected 1 vault_identity_entity resource, got %d", count) + } + // Two alias resources (one per record in the group) + if count := strings.Count(tf, "resource \"vault_identity_entity_alias\""); count != 2 { + t.Errorf("expected 2 vault_identity_entity_alias resources, got %d", count) + } + // Entity name uses base alias (no domain) + if !strings.Contains(tf, `name = "alice"`) { + t.Error("expected entity name to be the base alias \"alice\"") + } + // LDAP alias name preserved as-is + if !strings.Contains(tf, `name = "alice" # ldap`) { + t.Error("expected LDAP alias name \"alice\"") + } + // OIDC alias name preserved as-is (full email) + if !strings.Contains(tf, `name = "alice@corp.com" # oidc`) { + t.Error("expected OIDC alias name \"alice@corp.com\"") + } + // Variables declared for both mount accessors + if !strings.Contains(tf, `variable "accessor_auth_ldap_abc"`) { + t.Error("expected variable for auth_ldap_abc") + } + if !strings.Contains(tf, `variable "accessor_auth_oidc_xyz"`) { + t.Error("expected variable for auth_oidc_xyz") + } + // canonical_id references the entity resource + if !strings.Contains(tf, "vault_identity_entity.") { + t.Error("expected canonical_id referencing vault_identity_entity") + } +} + +func TestGenerateTF_MultipleGroups(t *testing.T) { + groups := [][]normalizer.Record{ + { + {ClientID: "ldap-001", Source: "jan.csv", MountAccessor: "auth_ldap_abc", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: "alice"}, + {ClientID: "oidc-001", Source: "jan.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: "alice@corp.com"}, + }, + { + {ClientID: "ldap-002", Source: "jan.csv", MountAccessor: "auth_ldap_abc", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: "bob"}, + {ClientID: "oidc-002", Source: "jan.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: "bob@corp.com"}, + }, + } + + out := filepath.Join(t.TempDir(), "out.tf") + n, err := GenerateTF(groups, out) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if n != 2 { + t.Errorf("expected 2 stubs, got %d", n) + } + + content, _ := os.ReadFile(out) + tf := string(content) + + if count := strings.Count(tf, "resource \"vault_identity_entity\""); count != 2 { + t.Errorf("expected 2 vault_identity_entity resources, got %d", count) + } + if count := strings.Count(tf, "resource \"vault_identity_entity_alias\""); count != 4 { + t.Errorf("expected 4 vault_identity_entity_alias resources, got %d", count) + } + // Shared mount accessors declared only once each + if count := strings.Count(tf, `variable "accessor_auth_ldap_abc"`); count != 1 { + t.Errorf("expected mount accessor variable declared once, got %d", count) + } + if count := strings.Count(tf, `variable "accessor_auth_oidc_xyz"`); count != 1 { + t.Errorf("expected mount accessor variable declared once, got %d", count) + } +} + +func TestGenerateTF_PetnamesAreUnique(t *testing.T) { + // Build enough groups to exercise multiple petname assignments. + aliases := []string{"alice", "bob", "carol", "dave", "eve"} + groups := make([][]normalizer.Record, len(aliases)) + for i, alias := range aliases { + groups[i] = []normalizer.Record{ + {ClientID: "ldap-" + alias, Source: "jan.csv", MountAccessor: "auth_ldap_abc", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: alias}, + {ClientID: "oidc-" + alias, Source: "jan.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: alias + "@corp.com"}, + } + } + + out := filepath.Join(t.TempDir(), "out.tf") + _, err := GenerateTF(groups, out) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + content, _ := os.ReadFile(out) + tf := string(content) + + // Extract resource names and verify uniqueness. + seen := make(map[string]int) + for _, line := range strings.Split(tf, "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "resource \"vault_identity_entity\" ") { + name := strings.Trim(strings.Fields(line)[2], `"{ `) + seen[name]++ + } + } + for name, count := range seen { + if count > 1 { + t.Errorf("petname %q used %d times — names must be unique", name, count) + } + } +} + +func TestGenerateTF_GroupedByFile(t *testing.T) { + groups := [][]normalizer.Record{ + // jan.csv — alice + { + {ClientID: "ldap-001", Source: "/data/jan.csv", MountAccessor: "auth_ldap_abc", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: "alice"}, + {ClientID: "oidc-001", Source: "/data/jan.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: "alice@corp.com"}, + }, + // feb.csv — alice (same person, different file) + { + {ClientID: "ldap-002", Source: "/data/feb.csv", MountAccessor: "auth_ldap_abc", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: "alice"}, + {ClientID: "oidc-002", Source: "/data/feb.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: "alice@corp.com"}, + }, + // feb.csv — bob (second group in the same file) + { + {ClientID: "ldap-003", Source: "/data/feb.csv", MountAccessor: "auth_ldap_abc", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: "bob"}, + {ClientID: "oidc-003", Source: "/data/feb.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: "bob@corp.com"}, + }, + } + + out := filepath.Join(t.TempDir(), "out.tf") + n, err := GenerateTF(groups, out) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if n != 3 { + t.Errorf("expected 3 stubs, got %d", n) + } + + content, _ := os.ReadFile(out) + tf := string(content) + + // File headers present for both source files. + if !strings.Contains(tf, "Source: jan.csv") { + t.Error("expected file header for jan.csv") + } + if !strings.Contains(tf, "Source: feb.csv") { + t.Error("expected file header for feb.csv") + } + // jan.csv header shows 1 group, feb.csv header shows 2 groups. + if !strings.Contains(tf, "jan.csv (1 alias group(s))") { + t.Error("expected jan.csv to show 1 alias group") + } + if !strings.Contains(tf, "feb.csv (2 alias group(s))") { + t.Error("expected feb.csv to show 2 alias groups") + } + // jan.csv header appears before feb.csv header. + if strings.Index(tf, "jan.csv") > strings.Index(tf, "feb.csv") { + t.Error("expected jan.csv section before feb.csv section") + } + // Total: 3 entities, 6 aliases. + if count := strings.Count(tf, "resource \"vault_identity_entity\""); count != 3 { + t.Errorf("expected 3 vault_identity_entity resources, got %d", count) + } + if count := strings.Count(tf, "resource \"vault_identity_entity_alias\""); count != 6 { + t.Errorf("expected 6 vault_identity_entity_alias resources, got %d", count) + } +} + +func TestGenerateTF_MissingMountAccessor(t *testing.T) { + groups := [][]normalizer.Record{ + { + {ClientID: "ldap-001", Source: "jan.csv", MountAccessor: "", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: "alice"}, + {ClientID: "oidc-001", Source: "jan.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: "alice@corp.com"}, + }, + } + + out := filepath.Join(t.TempDir(), "out.tf") + _, err := GenerateTF(groups, out) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + content, _ := os.ReadFile(out) + tf := string(content) + + // Record with no mount_accessor gets a TODO placeholder, not a var reference. + if !strings.Contains(tf, `mount_accessor = "TODO"`) { + t.Error("expected TODO placeholder for missing mount_accessor") + } +}