From 796704299955fe285aa17a80ebd46d7ce31d387e Mon Sep 17 00:00:00 2001
From: Schuyler Bishop <schuyler@ibm.com>
Date: Thu, 21 May 2026 13:51:39 -0500
Subject: [PATCH 1/8] adding feature that allows deduplication across methods
 per-file

---
 cmd/vault-csv-normalizer/main.go   |  57 ++++++++++-
 internal/normalizer/normalizer.go  | 148 ++++++++++++++++++++++++++---
 internal/parser/parser.go          |  51 +++++-----
 internal/renderer/renderer.go      |  36 ++++---
 internal/renderer/renderer_test.go |   7 +-
 5 files changed, 240 insertions(+), 59 deletions(-)

diff --git a/cmd/vault-csv-normalizer/main.go b/cmd/vault-csv-normalizer/main.go
index 1bcdb30..97b64bb 100644
--- a/cmd/vault-csv-normalizer/main.go
+++ b/cmd/vault-csv-normalizer/main.go
@@ -40,6 +40,7 @@ func (f fileDateFlag) Set(v string) error {
 func main() {
 	var inputFiles multiFlag
 	var dedupMethods multiFlag
+	var dedupMethodsPerFile multiFlag
 	var sortBy string
 	var filterNS string
 	var filterType string
@@ -64,6 +65,7 @@ func main() {
 	flag.BoolVar(&dedup, "d", false, "Deduplicate records by client_id across all input files")
 	flag.BoolVar(&dedupAlias, "dedup-alias", false, "Deduplicate by entity_alias_name (strips domain and -t0/-t1/-t2 tier suffixes; records without an alias are always kept; may be combined with -d)")
 	flag.Var(&dedupMethods, "dedup-methods", "Deduplicate by alias for the specified comma-separated auth methods, treating them as one identity group. Repeatable to define multiple groups (e.g. -dedup-methods ldap,oidc -dedup-methods jwt,saml).")
+	flag.Var(&dedupMethodsPerFile, "dedup-methods-per-file", "Like --dedup-methods but scoped to each input file independently. Records in different files are never collapsed against each other. Repeatable to define multiple groups.")
 	flag.BoolVar(&dedupJWT, "dedup-jwt", false, "Drop JWT records whose normalized alias matches a non-JWT record in the same file (prevents counting the same person via both LDAP/OIDC and JWT)")
 	flag.BoolVar(&listMethods, "list-methods", false, "Print every distinct auth method found in the input files (with record counts and alias coverage), then exit. Useful for deciding --dedup-methods groups.")
 	flag.BoolVar(&debugMode, "debug", false, "Print all records grouped by mount path")
@@ -131,6 +133,20 @@ func main() {
 		}
 	}
 
+	var methodGroupsPerFile [][]string
+	for _, val := range dedupMethodsPerFile {
+		var group []string
+		for _, m := range strings.Split(val, ",") {
+			m = strings.TrimSpace(strings.ToLower(m))
+			if m != "" {
+				group = append(group, m)
+			}
+		}
+		if len(group) > 0 {
+			methodGroupsPerFile = append(methodGroupsPerFile, group)
+		}
+	}
+
 	// Snapshot pre-dedup records so debug mode can show alias groups from the
 	// original data regardless of which dedup flags are active.
 	preDedup := normalized
@@ -164,6 +180,21 @@ func main() {
 		}
 		normalized = normalizer.DeduplicateByAliasForMethods(normalized, methodGroups)
 	}
+	if len(methodGroupsPerFile) > 0 {
+		groups := normalizer.FindAliasDuplicatesForMethodsPerFile(preDedup, methodGroupsPerFile)
+		if len(groups) > 0 {
+			fmt.Fprintf(os.Stdout, "Per-file method-scoped alias duplicates found (%d group(s))\n", len(groups))
+			fmt.Fprintln(os.Stdout, "=====================================================")
+			for _, group := range groups {
+				r0 := group[0]
+				fmt.Fprintf(os.Stdout, "\nAlias group: %q  file: %s\n",
+					normalizer.StripTierSuffix(normalizer.BaseAlias(r0.EntityAliasName)), filepath.Base(r0.Source))
+				renderer.PrintTable(os.Stdout, group)
+			}
+			fmt.Fprintln(os.Stdout)
+		}
+		normalized = normalizer.DeduplicateByAliasForMethodsPerFile(normalized, methodGroupsPerFile)
+	}
 
 	// Collect -d dedup statistics before running so debug mode can report
 	// exactly which client_ids were (or weren't) collapsed.
@@ -307,7 +338,7 @@ func main() {
 		fmt.Fprintln(os.Stdout)
 	}
 
-	if perFile {
+	if (perFile || len(methodGroupsPerFile) > 0) && len(inputFiles) > 1 {
 		bySource := make(map[string][]normalizer.Record, len(inputFiles))
 		for _, r := range normalized {
 			bySource[r.Source] = append(bySource[r.Source], r)
@@ -466,5 +497,27 @@ CSV FORMAT (Vault activity export):
             record and a JWT record for the same person are not collapsed
             (unless both groups are merged into one).
 
-      Can be combined with --dedup-alias, --dedup-jwt, and/or -d.`)
+      Can be combined with --dedup-alias, --dedup-jwt, and/or -d.
+
+  --dedup-methods-per-file <method1,method2,...>
+      Like --dedup-methods but deduplication is scoped to each input file
+      independently. Records in different files with the same normalized alias
+      are NOT collapsed against each other — only within-file duplicates are
+      removed. Useful when files represent different billing periods and you
+      want to count a returning user once per file rather than once globally.
+
+      Uses the same alias normalization and method-grouping syntax as
+      --dedup-methods (repeatable, comma-separated groups).
+
+        --dedup-methods-per-file ldap,oidc
+            Within each file, collapse LDAP and OIDC records that share the
+            same alias (exact match; tier suffixes like -t0/-t1 are distinct).
+            A user in jan.csv (LDAP) and feb.csv (OIDC) is NOT collapsed —
+            they appear once per file.
+
+        --dedup-methods-per-file ldap,oidc --dedup-methods-per-file jwt,saml
+            Two independent per-file groups. Same alias collapsing rules as
+            --dedup-methods but strictly within each source file.
+
+      Can be combined with --dedup-methods, --dedup-alias, --dedup-jwt, and/or -d.`)
 }
diff --git a/internal/normalizer/normalizer.go b/internal/normalizer/normalizer.go
index 5b3e543..efd20fd 100644
--- a/internal/normalizer/normalizer.go
+++ b/internal/normalizer/normalizer.go
@@ -23,9 +23,10 @@ type Record struct {
 	MountType            string
 	AuthMethod           string
 	ClientType           string // normalized: entity | non-entity | acme | secret-sync | unknown
-	TokenCreationTime    time.Time
-	ClientFirstUsageTime time.Time
-	EntityAliasName      string
+	TokenCreationTime           time.Time
+	ClientFirstUsageTime        time.Time
+	EntityAliasName             string
+	EntityAliasMetadataUsername string
 }
 
 // supportedSortKeys lists columns accepted by Sort.
@@ -51,18 +52,19 @@ func Normalize(raw []parser.RawRecord) []Record {
 
 func normalizeOne(r parser.RawRecord) Record {
 	return Record{
-		Source:               r.Source,
-		ClientID:             r.ClientID,
-		NamespaceID:          normalizeNamespaceID(r.NamespaceID),
-		NamespacePath:        normalizeNamespacePath(r.NamespacePath),
-		MountAccessor:        strings.TrimSpace(r.MountAccessor),
-		MountPath:            normalizeMountPath(r.MountPath),
-		MountType:            strings.ToLower(strings.TrimSpace(r.MountType)),
-		AuthMethod:           strings.ToLower(strings.TrimSpace(r.AuthMethod)),
-		ClientType:           normalizeClientType(r.ClientType),
-		TokenCreationTime:    ParseTime(r.TokenCreationTime),
-		ClientFirstUsageTime: ParseTime(r.ClientFirstUsageTime),
-		EntityAliasName:      strings.TrimSpace(r.EntityAliasName),
+		Source:                      r.Source,
+		ClientID:                    r.ClientID,
+		NamespaceID:                 normalizeNamespaceID(r.NamespaceID),
+		NamespacePath:               normalizeNamespacePath(r.NamespacePath),
+		MountAccessor:               strings.TrimSpace(r.MountAccessor),
+		MountPath:                   normalizeMountPath(r.MountPath),
+		MountType:                   strings.ToLower(strings.TrimSpace(r.MountType)),
+		AuthMethod:                  strings.ToLower(strings.TrimSpace(r.AuthMethod)),
+		ClientType:                  normalizeClientType(r.ClientType),
+		TokenCreationTime:           ParseTime(r.TokenCreationTime),
+		ClientFirstUsageTime:        ParseTime(r.ClientFirstUsageTime),
+		EntityAliasName:             strings.TrimSpace(r.EntityAliasName),
+		EntityAliasMetadataUsername: strings.TrimSpace(r.EntityAliasMetadataUsername),
 	}
 }
 
@@ -400,6 +402,122 @@ func DeduplicateByAliasForMethods(records []Record, groups [][]string) []Record
 	return out
 }
 
+// aliasKeyInFile is the deduplication key for per-file alias dedup. It includes
+// the source file so records from different files are never collapsed together.
+type aliasKeyInFile struct {
+	base      string
+	mountType string
+	source    string
+}
+
+// isOIDC reports whether r was authenticated via OIDC.
+func isOIDC(r Record) bool {
+	return r.MountType == "oidc" || r.AuthMethod == "oidc"
+}
+
+// effectiveAliasInFile returns the alias to use for per-file dedup. For OIDC
+// records, entity_alias_metadata.username holds the human-readable username;
+// entity_alias_name may be a subject identifier (UUID or email) that doesn't
+// match other methods. All other methods use entity_alias_name directly.
+func effectiveAliasInFile(r Record) string {
+	if isOIDC(r) && r.EntityAliasMetadataUsername != "" {
+		return r.EntityAliasMetadataUsername
+	}
+	return r.EntityAliasName
+}
+
+// aliasKeyInFileFor computes the per-file dedup key for a record. It applies
+// BaseAlias (strips everything after '@' if present) but not StripTierSuffix,
+// so "alice-t0" and "alice-t1" are treated as distinct identities. The '@'
+// strip is needed for JWT, which uses full email addresses ("alice@corp.com");
+// LDAP uses bare usernames ("alice"); OIDC uses entity_alias_metadata.username.
+// Returns false if the record's mount type is not in any provided group.
+func aliasKeyInFileFor(r Record, groupMap map[string]string) (aliasKeyInFile, bool) {
+	mt := r.MountType
+	if mt == "" {
+		mt = r.AuthMethod
+	}
+	canonical, ok := groupMap[mt]
+	if !ok {
+		return aliasKeyInFile{}, false
+	}
+	return aliasKeyInFile{
+		base:      BaseAlias(effectiveAliasInFile(r)),
+		mountType: canonical,
+		source:    r.Source,
+	}, true
+}
+
+// FindAliasDuplicatesForMethodsPerFile is like FindAliasDuplicatesForMethods
+// but only collapses records within the same source file. Records in different
+// files with the same alias are not reported as duplicates. Matching uses only
+// the portion of the alias left of '@'; tier suffixes (-t0/-t1/-t2) are not
+// stripped and must match exactly.
+func FindAliasDuplicatesForMethodsPerFile(records []Record, groups [][]string) [][]Record {
+	groupMap := buildMethodGroupMap(groups)
+
+	type entry struct {
+		key     aliasKeyInFile
+		members []Record
+	}
+	index := make(map[aliasKeyInFile]int)
+	var entries []entry
+
+	for _, r := range records {
+		if effectiveAliasInFile(r) == "" || IsPKIClient(r) {
+			continue
+		}
+		kf, ok := aliasKeyInFileFor(r, groupMap)
+		if !ok {
+			continue
+		}
+		if idx, exists := index[kf]; exists {
+			entries[idx].members = append(entries[idx].members, r)
+		} else {
+			index[kf] = len(entries)
+			entries = append(entries, entry{key: kf, members: []Record{r}})
+		}
+	}
+
+	var out [][]Record
+	for _, e := range entries {
+		if len(e.members) > 1 {
+			out = append(out, e.members)
+		}
+	}
+	return out
+}
+
+// DeduplicateByAliasForMethodsPerFile applies alias dedup like
+// DeduplicateByAliasForMethods but scoped to each source file independently.
+// Records in different files are never collapsed; only records from the same
+// file with the same normalized alias and method group are deduplicated.
+// Matching uses only the portion of the alias left of '@'; tier suffixes
+// (-t0/-t1/-t2) are not stripped and must match exactly.
+// Records with a blank EntityAliasName or that are PKI clients are always kept.
+func DeduplicateByAliasForMethodsPerFile(records []Record, groups [][]string) []Record {
+	groupMap := buildMethodGroupMap(groups)
+	seen := make(map[aliasKeyInFile]struct{}, len(records))
+	out := make([]Record, 0, len(records))
+	for _, r := range records {
+		if effectiveAliasInFile(r) == "" || IsPKIClient(r) {
+			out = append(out, r)
+			continue
+		}
+		kf, ok := aliasKeyInFileFor(r, groupMap)
+		if !ok {
+			out = append(out, r)
+			continue
+		}
+		if _, dup := seen[kf]; dup {
+			continue
+		}
+		seen[kf] = struct{}{}
+		out = append(out, r)
+	}
+	return out
+}
+
 // isJWT reports whether r was authenticated via JWT.
 func isJWT(r Record) bool {
 	return r.MountType == "jwt" || r.AuthMethod == "jwt"
diff --git a/internal/parser/parser.go b/internal/parser/parser.go
index 98a1199..aaa29f9 100644
--- a/internal/parser/parser.go
+++ b/internal/parser/parser.go
@@ -17,17 +17,18 @@ type RawRecord struct {
 	// Source tracks which file this record came from.
 	Source string
 
-	ClientID             string
-	NamespaceID          string
-	NamespacePath        string
-	MountAccessor        string
-	MountPath            string
-	MountType            string
-	AuthMethod           string
-	ClientType           string
-	TokenCreationTime    string // may be populated from legacy "timestamp" column
-	ClientFirstUsageTime string
-	EntityAliasName      string
+	ClientID                    string
+	NamespaceID                 string
+	NamespacePath               string
+	MountAccessor               string
+	MountPath                   string
+	MountType                   string
+	AuthMethod                  string
+	ClientType                  string
+	TokenCreationTime           string // may be populated from legacy "timestamp" column
+	ClientFirstUsageTime        string
+	EntityAliasName             string
+	EntityAliasMetadataUsername string
 }
 
 // knownColumns maps all recognised (lowercased, trimmed) header variants to
@@ -43,7 +44,8 @@ var knownColumns = map[string]string{
 	"client_type":            "client_type",
 	"token_creation_time":    "token_creation_time",
 	"client_first_usage_time": "client_first_usage_time",
-	"entity_alias_name":      "entity_alias_name",
+	"entity_alias_name":                  "entity_alias_name",
+	"entity_alias_metadata.username":     "entity_alias_metadata_username",
 	// Legacy / alternative column names:
 	"timestamp":              "token_creation_time", // Vault < 1.17
 	"first_seen":             "client_first_usage_time",
@@ -123,18 +125,19 @@ func parseReader(r io.Reader, source string) ([]RawRecord, error) {
 		}
 
 		records = append(records, RawRecord{
-			Source:               source,
-			ClientID:             clientID,
-			NamespaceID:          get(row, "namespace_id"),
-			NamespacePath:        get(row, "namespace_path"),
-			MountAccessor:        get(row, "mount_accessor"),
-			MountPath:            get(row, "mount_path"),
-			MountType:            get(row, "mount_type"),
-			AuthMethod:           get(row, "auth_method"),
-			ClientType:           get(row, "client_type"),
-			TokenCreationTime:    get(row, "token_creation_time"),
-			ClientFirstUsageTime: get(row, "client_first_usage_time"),
-			EntityAliasName:      get(row, "entity_alias_name"),
+			Source:                      source,
+			ClientID:                    clientID,
+			NamespaceID:                 get(row, "namespace_id"),
+			NamespacePath:               get(row, "namespace_path"),
+			MountAccessor:               get(row, "mount_accessor"),
+			MountPath:                   get(row, "mount_path"),
+			MountType:                   get(row, "mount_type"),
+			AuthMethod:                  get(row, "auth_method"),
+			ClientType:                  get(row, "client_type"),
+			TokenCreationTime:           get(row, "token_creation_time"),
+			ClientFirstUsageTime:        get(row, "client_first_usage_time"),
+			EntityAliasName:             get(row, "entity_alias_name"),
+			EntityAliasMetadataUsername: get(row, "entity_alias_metadata_username"),
 		})
 	}
 
diff --git a/internal/renderer/renderer.go b/internal/renderer/renderer.go
index 2d788ff..b1e2659 100644
--- a/internal/renderer/renderer.go
+++ b/internal/renderer/renderer.go
@@ -27,16 +27,6 @@ var columns = []column{
 		width:  16,
 		get:    func(r normalizer.Record) string { return r.NamespacePath },
 	},
-	{
-		header: "Client Type",
-		width:  12,
-		get:    func(r normalizer.Record) string { return r.ClientType },
-	},
-	{
-		header: "Auth Method",
-		width:  12,
-		get:    func(r normalizer.Record) string { return r.AuthMethod },
-	},
 	{
 		header: "Mount Path",
 		width:  12,
@@ -74,24 +64,44 @@ var aliasColumn = column{
 	get:    func(r normalizer.Record) string { return r.EntityAliasName },
 }
 
+var oidcUsernameColumn = column{
+	header: "OIDC Username",
+	width:  13,
+	get:    func(r normalizer.Record) string { return r.EntityAliasMetadataUsername },
+}
+
 // PrintTable writes the records as a plain-text table to w. If any record has
 // a non-empty EntityAliasName, an Entity Alias column is appended so the
-// original alias values are visible in alias deduplication output.
+// original alias values are visible in alias deduplication output. If any
+// record has a non-empty EntityAliasMetadataUsername, an OIDC Username column
+// is also appended.
 func PrintTable(w io.Writer, records []normalizer.Record) {
 	if len(records) == 0 {
 		fmt.Fprintln(w, "(no records to display)")
 		return
 	}
 
-	// Build column list, appending the alias column only when the data has it.
+	// Build column list, appending extra columns only when the data has them.
 	cols := make([]column, len(columns))
 	copy(cols, columns)
+	var hasAlias, hasOIDCUsername bool
 	for _, r := range records {
 		if r.EntityAliasName != "" {
-			cols = append(cols, aliasColumn)
+			hasAlias = true
+		}
+		if r.EntityAliasMetadataUsername != "" {
+			hasOIDCUsername = true
+		}
+		if hasAlias && hasOIDCUsername {
 			break
 		}
 	}
+	if hasAlias {
+		cols = append(cols, aliasColumn)
+	}
+	if hasOIDCUsername {
+		cols = append(cols, oidcUsernameColumn)
+	}
 
 	for _, r := range records {
 		for i, c := range cols {
diff --git a/internal/renderer/renderer_test.go b/internal/renderer/renderer_test.go
index 383b91c..f775f60 100644
--- a/internal/renderer/renderer_test.go
+++ b/internal/renderer/renderer_test.go
@@ -46,8 +46,8 @@ func TestPrintTable_RendersRows(t *testing.T) {
 	if !strings.Contains(out, "Namespace Path") {
 		t.Error("expected header 'Namespace Path'")
 	}
-	if !strings.Contains(out, "Client Type") {
-		t.Error("expected header 'Client Type'")
+	if strings.Contains(out, "Client Type") {
+		t.Error("unexpected header 'Client Type' — removed from table output")
 	}
 
 	// Data rows present
@@ -57,9 +57,6 @@ func TestPrintTable_RendersRows(t *testing.T) {
 	if !strings.Contains(out, "education/") {
 		t.Error("expected namespace 'education/'")
 	}
-	if !strings.Contains(out, "non-entity") {
-		t.Error("expected client type 'non-entity'")
-	}
 }
 
 func TestPrintTable_ZeroTimeFmtDash(t *testing.T) {

From 0de034f567c5aa0fe7b366a9a22552bf108e51af Mon Sep 17 00:00:00 2001
From: Andrew Thielen <andrew.thielen@hashicorp.com>
Date: Thu, 21 May 2026 16:21:21 -0500
Subject: [PATCH 2/8] Add abandoned record filtering

---
 README.md                              | 14 ++++
 cmd/vault-csv-normalizer/main.go       | 15 ++++
 internal/normalizer/normalizer.go      | 96 ++++++++++++++++++--------
 internal/normalizer/normalizer_test.go | 88 ++++++++++++++++-------
 internal/parser/parser.go              |  3 +
 internal/parser/parser_test.go         |  8 ++-
 6 files changed, 166 insertions(+), 58 deletions(-)

diff --git a/README.md b/README.md
index ff8f84c..66c0b46 100644
--- a/README.md
+++ b/README.md
@@ -111,11 +111,19 @@ OPTIONS:
         counted twice when they authenticate via both LDAP/OIDC and JWT.
         Records without an alias are always kept. May be combined with
         --dedup-alias, --dedup-methods, and/or -d.
+  -remove-abandoned-clients
+        Remove abandoned clients where entity_name and entity_alias_name are
+        both blank. This includes records with no auth mount (mount_path
+        empty) and merged/deleted entities (mount_path present). Applied after
+        all deduplication steps.
   -per-file
         Print a summary for each input file before the combined summary
   -debug
         Print all records grouped by mount path, with a full record table under
         each mount. Records with no mount path are grouped as "(no mount)".
+      Also prints how many records were removed by
+      --remove-abandoned-clients when that flag is enabled, split into
+      no-mount and merged/deleted buckets.
   -help
         Show usage information
 ```
@@ -178,6 +186,12 @@ vault-csv-normalizer -f export.csv --dedup-jwt
 # Full dedup: collapse tiers, dedup client_ids, then drop redundant JWT records
 vault-csv-normalizer -f jan.csv feb.csv --dedup-alias -d --dedup-jwt
 
+# Remove abandoned clients from final totals
+vault-csv-normalizer -f export.csv --remove-abandoned-clients
+
+# Same as above, with debug count output for removed rows
+vault-csv-normalizer -f export.csv --remove-abandoned-clients --debug
+
 # Deduplicate LDAP and OIDC as one identity group — same person via either
 # method is counted once; other auth methods are unaffected
 vault-csv-normalizer -f export.csv --dedup-methods ldap,oidc
diff --git a/cmd/vault-csv-normalizer/main.go b/cmd/vault-csv-normalizer/main.go
index 97b64bb..75ebddd 100644
--- a/cmd/vault-csv-normalizer/main.go
+++ b/cmd/vault-csv-normalizer/main.go
@@ -50,6 +50,7 @@ func main() {
 	var dedup bool
 	var dedupAlias bool
 	var dedupJWT bool
+	var removeAbandonedClients bool
 	var listMethods bool
 	var debugMode bool
 	var perFile bool
@@ -67,6 +68,7 @@ func main() {
 	flag.Var(&dedupMethods, "dedup-methods", "Deduplicate by alias for the specified comma-separated auth methods, treating them as one identity group. Repeatable to define multiple groups (e.g. -dedup-methods ldap,oidc -dedup-methods jwt,saml).")
 	flag.Var(&dedupMethodsPerFile, "dedup-methods-per-file", "Like --dedup-methods but scoped to each input file independently. Records in different files are never collapsed against each other. Repeatable to define multiple groups.")
 	flag.BoolVar(&dedupJWT, "dedup-jwt", false, "Drop JWT records whose normalized alias matches a non-JWT record in the same file (prevents counting the same person via both LDAP/OIDC and JWT)")
+	flag.BoolVar(&removeAbandonedClients, "remove-abandoned-clients", false, "Remove abandoned clients (blank entity_name and entity_alias_name) after deduplication. Includes records with no auth mount and merged/deleted entities.")
 	flag.BoolVar(&listMethods, "list-methods", false, "Print every distinct auth method found in the input files (with record counts and alias coverage), then exit. Useful for deciding --dedup-methods groups.")
 	flag.BoolVar(&debugMode, "debug", false, "Print all records grouped by mount path")
 	flag.BoolVar(&perFile, "per-file", false, "Print a summary for each input file before the combined summary")
@@ -224,6 +226,16 @@ func main() {
 		normalized = normalizer.DeduplicateJWT(normalized)
 	}
 
+	removedAbandonedCounts := normalizer.AbandonedClientCounts{}
+	if removeAbandonedClients {
+		normalized, removedAbandonedCounts = normalizer.FilterAbandonedClients(normalized)
+
+		fmt.Fprintf(os.Stdout, "Removed abandoned clients (total): %d\n", removedAbandonedCounts.Total())
+		fmt.Fprintf(os.Stdout, "  no auth mount (mount path empty): %d\n", removedAbandonedCounts.NoMount)
+		fmt.Fprintf(os.Stdout, "  merged/deleted (mount path present): %d\n", removedAbandonedCounts.MergedDeleted)
+		fmt.Fprintln(os.Stdout, strings.Repeat("-", 70))
+	}
+
 	// Apply filters.
 	if filterNS != "" {
 		normalized = normalizer.FilterByNamespace(normalized, filterNS)
@@ -445,6 +457,9 @@ EXAMPLES:
   # Per-file since filters on multiple files
   vault-csv-normalizer -f jan.csv feb.csv --since-file jan.csv=2024-01-15 --since-file feb.csv=2024-02-01
 
+	# Remove abandoned clients (blank entity fields)
+	vault-csv-normalizer -f export.csv --remove-abandoned-clients
+
 CSV FORMAT (Vault activity export):
   Expected columns (order-independent, case-insensitive):
     client_id, namespace_id, namespace_path, mount_accessor, mount_path,
diff --git a/internal/normalizer/normalizer.go b/internal/normalizer/normalizer.go
index efd20fd..8a6d573 100644
--- a/internal/normalizer/normalizer.go
+++ b/internal/normalizer/normalizer.go
@@ -14,15 +14,16 @@ import (
 
 // Record is a fully normalized Vault client record.
 type Record struct {
-	Source               string
-	ClientID             string
-	NamespaceID          string
-	NamespacePath        string
-	MountAccessor        string
-	MountPath            string
-	MountType            string
-	AuthMethod           string
-	ClientType           string // normalized: entity | non-entity | acme | secret-sync | unknown
+	Source                      string
+	ClientID                    string
+	EntityName                  string
+	NamespaceID                 string
+	NamespacePath               string
+	MountAccessor               string
+	MountPath                   string
+	MountType                   string
+	AuthMethod                  string
+	ClientType                  string // normalized: entity | non-entity | acme | secret-sync | unknown
 	TokenCreationTime           time.Time
 	ClientFirstUsageTime        time.Time
 	EntityAliasName             string
@@ -31,14 +32,14 @@ type Record struct {
 
 // supportedSortKeys lists columns accepted by Sort.
 var supportedSortKeys = map[string]bool{
-	"namespace_path":         true,
-	"client_type":            true,
-	"token_creation_time":    true,
+	"namespace_path":          true,
+	"client_type":             true,
+	"token_creation_time":     true,
 	"client_first_usage_time": true,
-	"mount_accessor":         true,
-	"mount_path":             true,
-	"auth_method":            true,
-	"source":                 true,
+	"mount_accessor":          true,
+	"mount_path":              true,
+	"auth_method":             true,
+	"source":                  true,
 }
 
 // Normalize converts a slice of raw records into normalized records.
@@ -54,6 +55,7 @@ func normalizeOne(r parser.RawRecord) Record {
 	return Record{
 		Source:                      r.Source,
 		ClientID:                    r.ClientID,
+		EntityName:                  strings.TrimSpace(r.EntityName),
 		NamespaceID:                 normalizeNamespaceID(r.NamespaceID),
 		NamespacePath:               normalizeNamespacePath(r.NamespacePath),
 		MountAccessor:               strings.TrimSpace(r.MountAccessor),
@@ -103,20 +105,20 @@ func normalizeMountPath(path string) string {
 
 // clientTypeAliases maps various raw strings to a canonical client type.
 var clientTypeAliases = map[string]string{
-	"entity":                     "entity",
-	"entity client":              "entity",
-	"non-entity":                 "non-entity",
-	"non_entity":                 "non-entity",
-	"non-entity client":          "non-entity",
-	"non_entity_client":          "non-entity",
-	"nonentity":                  "non-entity",
-	"acme":                       "acme",
-	"acme client":                "acme",
-	"secret-sync":                "secret-sync",
-	"secret_sync":                "secret-sync",
-	"secretsync":                 "secret-sync",
-	"secrets sync":               "secret-sync",
-	"secret sync":                "secret-sync",
+	"entity":            "entity",
+	"entity client":     "entity",
+	"non-entity":        "non-entity",
+	"non_entity":        "non-entity",
+	"non-entity client": "non-entity",
+	"non_entity_client": "non-entity",
+	"nonentity":         "non-entity",
+	"acme":              "acme",
+	"acme client":       "acme",
+	"secret-sync":       "secret-sync",
+	"secret_sync":       "secret-sync",
+	"secretsync":        "secret-sync",
+	"secrets sync":      "secret-sync",
+	"secret sync":       "secret-sync",
 }
 
 func normalizeClientType(raw string) string {
@@ -636,6 +638,40 @@ func FilterByClientType(records []Record, clientType string) []Record {
 	return out
 }
 
+// AbandonedClientCounts reports how many anonymous records were removed by
+// FilterAbandonedClients, split by whether an auth mount is present.
+type AbandonedClientCounts struct {
+	NoMount       int
+	MergedDeleted int
+}
+
+// Total returns the sum of removed abandoned-client records.
+func (c AbandonedClientCounts) Total() int {
+	return c.NoMount + c.MergedDeleted
+}
+
+// FilterAbandonedClients removes records with no entity identity (both
+// entity_name and entity_alias_name are blank) and reports separate counts for
+// two cases:
+//   - NoMount: mount_path is blank (auth mount no longer exists)
+//   - MergedDeleted: mount_path is present (entity was likely merged/deleted)
+func FilterAbandonedClients(records []Record) ([]Record, AbandonedClientCounts) {
+	out := make([]Record, 0, len(records))
+	counts := AbandonedClientCounts{}
+	for _, r := range records {
+		if r.EntityName == "" && r.EntityAliasName == "" {
+			if r.MountPath == "" {
+				counts.NoMount++
+				continue
+			}
+			counts.MergedDeleted++
+			continue
+		}
+		out = append(out, r)
+	}
+	return out, counts
+}
+
 // Sort sorts records in-place by the given column key. Returns an error if
 // the key is not recognized.
 func Sort(records []Record, by string) error {
diff --git a/internal/normalizer/normalizer_test.go b/internal/normalizer/normalizer_test.go
index 68adaa1..18a919b 100644
--- a/internal/normalizer/normalizer_test.go
+++ b/internal/normalizer/normalizer_test.go
@@ -1,6 +1,7 @@
 package normalizer
 
 import (
+	"strings"
 	"testing"
 	"time"
 
@@ -73,15 +74,16 @@ func TestParseTime(t *testing.T) {
 func TestNormalize(t *testing.T) {
 	raw := []parser.RawRecord{
 		{
-			Source:             "jan.csv",
-			ClientID:           "abc-123",
-			NamespaceID:        "",
-			NamespacePath:      "root",
-			MountPath:          "auth/approle",
-			MountType:          "APPROLE",
-			AuthMethod:         "AppRole",
-			ClientType:         "non_entity",
-			TokenCreationTime:  "2024-01-01T00:00:00Z",
+			Source:            "jan.csv",
+			ClientID:          "abc-123",
+			EntityName:        "  Alice Smith  ",
+			NamespaceID:       "",
+			NamespacePath:     "root",
+			MountPath:         "auth/approle",
+			MountType:         "APPROLE",
+			AuthMethod:        "AppRole",
+			ClientType:        "non_entity",
+			TokenCreationTime: "2024-01-01T00:00:00Z",
 		},
 	}
 	records := Normalize(raw)
@@ -89,6 +91,9 @@ func TestNormalize(t *testing.T) {
 		t.Fatalf("expected 1 record, got %d", len(records))
 	}
 	r := records[0]
+	if r.EntityName != "Alice Smith" {
+		t.Errorf("EntityName: got %q, want Alice Smith", r.EntityName)
+	}
 	if r.NamespacePath != "[root]" {
 		t.Errorf("NamespacePath: got %q, want [root]", r.NamespacePath)
 	}
@@ -139,6 +144,40 @@ func TestFilterByClientType(t *testing.T) {
 	}
 }
 
+func TestFilterAbandonedClients(t *testing.T) {
+	records := []Record{
+		// removed as merged/deleted: mount path present
+		{ClientID: "drop-merged-1", EntityName: "", EntityAliasName: "", MountPath: "auth/ldap/", MountType: "ldap"},
+		// removed as merged/deleted: mount path present even if mount type is blank
+		{ClientID: "drop-merged-2", EntityName: "", EntityAliasName: "", MountPath: "auth/oidc/", MountType: ""},
+		// removed as no mount: mount path missing
+		{ClientID: "drop-nomount-1", EntityName: "", EntityAliasName: "", MountPath: "", MountType: "ldap"},
+		// keep: entity name present
+		{ClientID: "keep-3", EntityName: "Alice", EntityAliasName: "", MountPath: "auth/ldap/", MountType: "ldap"},
+		// keep: entity alias present
+		{ClientID: "keep-4", EntityName: "", EntityAliasName: "alice", MountPath: "auth/ldap/", MountType: "ldap"},
+	}
+
+	out, counts := FilterAbandonedClients(records)
+	if counts.NoMount != 1 {
+		t.Fatalf("expected NoMount=1, got %d", counts.NoMount)
+	}
+	if counts.MergedDeleted != 2 {
+		t.Fatalf("expected MergedDeleted=2, got %d", counts.MergedDeleted)
+	}
+	if counts.Total() != 3 {
+		t.Fatalf("expected Total=3, got %d", counts.Total())
+	}
+	if len(out) != 2 {
+		t.Fatalf("expected 2 records after filter, got %d", len(out))
+	}
+	for _, r := range out {
+		if strings.HasPrefix(r.ClientID, "drop-") {
+			t.Fatal("drop-* records should have been removed")
+		}
+	}
+}
+
 func TestDeduplicate_PrefersNonEmptyMount(t *testing.T) {
 	records := []Record{
 		{ClientID: "abc", MountPath: ""},
@@ -246,8 +285,8 @@ func TestStripTierSuffix(t *testing.T) {
 		{"alice-t10", "alice-t10"},
 		{"alice-T0", "alice-T0"}, // case-sensitive
 		{"alice", "alice"},
-		{"-t0", ""},   // degenerate: only the suffix
-		{"t0", "t0"},  // no hyphen
+		{"-t0", ""},  // degenerate: only the suffix
+		{"t0", "t0"}, // no hyphen
 		{"", ""},
 	}
 	for _, c := range cases {
@@ -285,7 +324,7 @@ func TestDeduplicateByAlias_CollapsesSameBaseAcrossAccessors(t *testing.T) {
 		{ClientID: "3", EntityAliasName: "sbishop-t0", MountAccessor: "auth_ldap_abc123", Source: "jan.csv"},           // dup: tier stripped → "sbishop"
 		{ClientID: "4", EntityAliasName: "sbishop-t1", MountAccessor: "auth_oidc_xyz789", Source: "jan.csv"},           // dup: tier stripped → "sbishop"
 		{ClientID: "5", EntityAliasName: "sbishop", MountAccessor: "auth_ldap_abc123", Source: "feb.csv"},              // dup: same normalized alias across files
-		{ClientID: "6", EntityAliasName: ""},                                                                            // kept: blank always kept
+		{ClientID: "6", EntityAliasName: ""}, // kept: blank always kept
 	}
 	out := DeduplicateByAlias(records)
 	if len(out) != 2 {
@@ -324,7 +363,7 @@ func TestFindAliasDuplicates_SameBaseAcrossAccessors(t *testing.T) {
 		{ClientID: "2", EntityAliasName: "sbishop@hashicorp.com", MountAccessor: "auth_jwt_def456", Source: "jan.csv"},
 		{ClientID: "3", EntityAliasName: "sbishop-t0", MountAccessor: "auth_ldap_abc123", Source: "jan.csv"},
 		{ClientID: "4", EntityAliasName: "sbishop", MountAccessor: "auth_ldap_abc123", Source: "feb.csv"}, // cross-file dup
-		{ClientID: "5", EntityAliasName: ""},                                                               // ignored
+		{ClientID: "5", EntityAliasName: ""}, // ignored
 	}
 	groups := FindAliasDuplicates(records)
 	if len(groups) != 1 {
@@ -358,11 +397,11 @@ func TestDeduplicateByAlias_IgnoresPKIClients(t *testing.T) {
 	// PKI clients are always kept regardless of alias duplication.
 	// Non-PKI clients with the same base alias in the same file are deduplicated.
 	records := []Record{
-		{ClientID: "1", EntityAliasName: "abc-123", ClientType: "acme", Source: "jan.csv"},  // PKI, kept
-		{ClientID: "2", EntityAliasName: "abc-456", ClientType: "acme", Source: "jan.csv"},  // PKI, kept (not deduped)
+		{ClientID: "1", EntityAliasName: "abc-123", ClientType: "acme", Source: "jan.csv"},             // PKI, kept
+		{ClientID: "2", EntityAliasName: "abc-456", ClientType: "acme", Source: "jan.csv"},             // PKI, kept (not deduped)
 		{ClientID: "3", EntityAliasName: "abc-789", MountAccessor: "auth_cert_xyz", Source: "jan.csv"}, // cert auth — PKI, kept
-		{ClientID: "4", EntityAliasName: "alice@corp", Source: "jan.csv"},                   // non-PKI, first: kept
-		{ClientID: "5", EntityAliasName: "alice@example.com", Source: "jan.csv"},            // non-PKI dup: base "alice" already seen, dropped
+		{ClientID: "4", EntityAliasName: "alice@corp", Source: "jan.csv"},                              // non-PKI, first: kept
+		{ClientID: "5", EntityAliasName: "alice@example.com", Source: "jan.csv"},                       // non-PKI dup: base "alice" already seen, dropped
 	}
 	out := DeduplicateByAlias(records)
 	if len(out) != 4 {
@@ -547,7 +586,6 @@ func TestPartitionPKI_NoPKI(t *testing.T) {
 	}
 }
 
-
 func TestPartitionPKI_Empty(t *testing.T) {
 	pki, nonPKI := PartitionPKI(nil, IsPKIClient)
 	if pki != nil || nonPKI != nil {
@@ -567,8 +605,8 @@ func TestFilterSincePerSource_FiltersTargetFileOnly(t *testing.T) {
 	records := []Record{
 		// jan.csv: one record before cutoff, one after
 		{ClientID: "j1", Source: "jan.csv", TokenCreationTime: jan15.Add(-24 * time.Hour)}, // before — excluded
-		{ClientID: "j2", Source: "jan.csv", TokenCreationTime: jan15},                       // on cutoff — kept
-		{ClientID: "j3", Source: "jan.csv", TokenCreationTime: jan20},                       // after — kept
+		{ClientID: "j2", Source: "jan.csv", TokenCreationTime: jan15},                      // on cutoff — kept
+		{ClientID: "j3", Source: "jan.csv", TokenCreationTime: jan20},                      // after — kept
 		// feb.csv: not in filter map — all kept regardless of date
 		{ClientID: "f1", Source: "feb.csv", TokenCreationTime: jan15.Add(-24 * time.Hour)}, // old but kept
 		{ClientID: "f2", Source: "feb.csv", TokenCreationTime: feb01},
@@ -813,10 +851,10 @@ func TestDeduplicateByAlias_CollapseOIDCWithLDAP(t *testing.T) {
 	// JWT remains a separate group and is not collapsed here.
 	records := []Record{
 		{ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"},  // dup: ldap/oidc group, normalizes to "alice"
-		{ClientID: "3", EntityAliasName: "alice-t0", MountType: "ldap", Source: "feb.csv"},        // dup: ldap/oidc group, tier stripped → "alice"
-		{ClientID: "4", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"},   // kept: jwt is a separate group
-		{ClientID: "5", EntityAliasName: "bob", MountType: "ldap", Source: "jan.csv"},             // kept: different alias
+		{ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, // dup: ldap/oidc group, normalizes to "alice"
+		{ClientID: "3", EntityAliasName: "alice-t0", MountType: "ldap", Source: "feb.csv"},       // dup: ldap/oidc group, tier stripped → "alice"
+		{ClientID: "4", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"},  // kept: jwt is a separate group
+		{ClientID: "5", EntityAliasName: "bob", MountType: "ldap", Source: "jan.csv"},            // kept: different alias
 	}
 	out := DeduplicateByAlias(records)
 	if len(out) != 3 {
@@ -843,7 +881,7 @@ func TestDeduplicateByAlias_ScopedToMountType(t *testing.T) {
 	// → they ARE collapsed.
 	records := []Record{
 		{ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice-t0", MountType: "ldap", Source: "jan.csv"},    // dup: same type + base
+		{ClientID: "2", EntityAliasName: "alice-t0", MountType: "ldap", Source: "jan.csv"},      // dup: same type + base
 		{ClientID: "3", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"}, // kept: different mount type
 	}
 	out := DeduplicateByAlias(records)
diff --git a/internal/parser/parser.go b/internal/parser/parser.go
index aaa29f9..394b757 100644
--- a/internal/parser/parser.go
+++ b/internal/parser/parser.go
@@ -18,6 +18,7 @@ type RawRecord struct {
 	Source string
 
 	ClientID                    string
+	EntityName                  string
 	NamespaceID                 string
 	NamespacePath               string
 	MountAccessor               string
@@ -35,6 +36,7 @@ type RawRecord struct {
 // a canonical field name used by the column mapper below.
 var knownColumns = map[string]string{
 	"client_id":              "client_id",
+	"entity_name":            "entity_name",
 	"namespace_id":           "namespace_id",
 	"namespace_path":         "namespace_path",
 	"mount_accessor":         "mount_accessor",
@@ -127,6 +129,7 @@ func parseReader(r io.Reader, source string) ([]RawRecord, error) {
 		records = append(records, RawRecord{
 			Source:                      source,
 			ClientID:                    clientID,
+			EntityName:                  get(row, "entity_name"),
 			NamespaceID:                 get(row, "namespace_id"),
 			NamespacePath:               get(row, "namespace_path"),
 			MountAccessor:               get(row, "mount_accessor"),
diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go
index e1a1f02..21d7e93 100644
--- a/internal/parser/parser_test.go
+++ b/internal/parser/parser_test.go
@@ -6,9 +6,9 @@ import (
 )
 
 func TestParseReader_StandardColumns(t *testing.T) {
-	csv := `client_id,namespace_id,namespace_path,mount_accessor,mount_path,mount_type,auth_method,client_type,token_creation_time,client_first_usage_time
-abc-123,root,[root],auth_approle_abc,auth/approle/,approle,approle,entity,2024-01-15T10:00:00Z,2024-01-15T12:00:00Z
-def-456,ns1,education/,auth_ldap_xyz,auth/ldap/,ldap,ldap,non-entity,2024-02-01T08:00:00Z,
+	csv := `client_id,entity_name,namespace_id,namespace_path,mount_accessor,mount_path,mount_type,auth_method,client_type,token_creation_time,client_first_usage_time
+abc-123,Alice Smith,root,[root],auth_approle_abc,auth/approle/,approle,approle,entity,2024-01-15T10:00:00Z,2024-01-15T12:00:00Z
+def-456,,ns1,education/,auth_ldap_xyz,auth/ldap/,ldap,ldap,non-entity,2024-02-01T08:00:00Z,
 `
 	records, err := parseReader(strings.NewReader(csv), "test.csv")
 	if err != nil {
@@ -20,6 +20,7 @@ def-456,ns1,education/,auth_ldap_xyz,auth/ldap/,ldap,ldap,non-entity,2024-02-01T
 
 	r := records[0]
 	assertEqual(t, "client_id", "abc-123", r.ClientID)
+	assertEqual(t, "entity_name", "Alice Smith", r.EntityName)
 	assertEqual(t, "namespace_id", "root", r.NamespaceID)
 	assertEqual(t, "namespace_path", "[root]", r.NamespacePath)
 	assertEqual(t, "mount_accessor", "auth_approle_abc", r.MountAccessor)
@@ -30,6 +31,7 @@ def-456,ns1,education/,auth_ldap_xyz,auth/ldap/,ldap,ldap,non-entity,2024-02-01T
 	assertEqual(t, "client_first_usage_time", "2024-01-15T12:00:00Z", r.ClientFirstUsageTime)
 
 	r2 := records[1]
+	assertEqual(t, "entity_name_empty", "", r2.EntityName)
 	assertEqual(t, "client_first_usage_time_empty", "", r2.ClientFirstUsageTime)
 }
 

From ee2570eab5ae8326a9f481c004c15244de830837 Mon Sep 17 00:00:00 2001
From: Andrew Thielen <andrew.thielen@hashicorp.com>
Date: Thu, 21 May 2026 16:35:32 -0500
Subject: [PATCH 3/8] Only filter entity clients

---
 internal/normalizer/normalizer.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/normalizer/normalizer.go b/internal/normalizer/normalizer.go
index 8a6d573..315fac5 100644
--- a/internal/normalizer/normalizer.go
+++ b/internal/normalizer/normalizer.go
@@ -659,7 +659,7 @@ func FilterAbandonedClients(records []Record) ([]Record, AbandonedClientCounts)
 	out := make([]Record, 0, len(records))
 	counts := AbandonedClientCounts{}
 	for _, r := range records {
-		if r.EntityName == "" && r.EntityAliasName == "" {
+		if r.EntityName == "" && r.EntityAliasName == "" && r.ClientType == "entity" {
 			if r.MountPath == "" {
 				counts.NoMount++
 				continue

From 3a2e584ceb3a6471b6ae13512af0b32750e3efbe Mon Sep 17 00:00:00 2001
From: Andrew Thielen <andrew.thielen@hashicorp.com>
Date: Thu, 21 May 2026 17:14:54 -0500
Subject: [PATCH 4/8] Breakdown abandoned clients by category

---
 cmd/vault-csv-normalizer/main.go       |  6 ++++--
 internal/normalizer/normalizer.go      | 16 +++++++++++++---
 internal/normalizer/normalizer_test.go | 22 ++++++++++++++++------
 3 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/cmd/vault-csv-normalizer/main.go b/cmd/vault-csv-normalizer/main.go
index 75ebddd..93d9845 100644
--- a/cmd/vault-csv-normalizer/main.go
+++ b/cmd/vault-csv-normalizer/main.go
@@ -231,8 +231,10 @@ func main() {
 		normalized, removedAbandonedCounts = normalizer.FilterAbandonedClients(normalized)
 
 		fmt.Fprintf(os.Stdout, "Removed abandoned clients (total): %d\n", removedAbandonedCounts.Total())
-		fmt.Fprintf(os.Stdout, "  no auth mount (mount path empty): %d\n", removedAbandonedCounts.NoMount)
-		fmt.Fprintf(os.Stdout, "  merged/deleted (mount path present): %d\n", removedAbandonedCounts.MergedDeleted)
+		fmt.Fprintf(os.Stdout, "  no auth mount (mount path empty): %d  (PKI: %d, non-PKI: %d)\n",
+			removedAbandonedCounts.NoMount, removedAbandonedCounts.NoMountPKI, removedAbandonedCounts.NoMount-removedAbandonedCounts.NoMountPKI)
+		fmt.Fprintf(os.Stdout, "  merged/deleted (mount path present): %d  (PKI: %d, non-PKI: %d)\n",
+			removedAbandonedCounts.MergedDeleted, removedAbandonedCounts.MergedDeletedPKI, removedAbandonedCounts.MergedDeleted-removedAbandonedCounts.MergedDeletedPKI)
 		fmt.Fprintln(os.Stdout, strings.Repeat("-", 70))
 	}
 
diff --git a/internal/normalizer/normalizer.go b/internal/normalizer/normalizer.go
index 315fac5..df9a2d6 100644
--- a/internal/normalizer/normalizer.go
+++ b/internal/normalizer/normalizer.go
@@ -639,10 +639,13 @@ func FilterByClientType(records []Record, clientType string) []Record {
 }
 
 // AbandonedClientCounts reports how many anonymous records were removed by
-// FilterAbandonedClients, split by whether an auth mount is present.
+// FilterAbandonedClients, split by whether an auth mount is present and
+// whether the record is a PKI client.
 type AbandonedClientCounts struct {
-	NoMount       int
-	MergedDeleted int
+	NoMount          int
+	NoMountPKI       int
+	MergedDeleted    int
+	MergedDeletedPKI int
 }
 
 // Total returns the sum of removed abandoned-client records.
@@ -660,11 +663,18 @@ func FilterAbandonedClients(records []Record) ([]Record, AbandonedClientCounts)
 	counts := AbandonedClientCounts{}
 	for _, r := range records {
 		if r.EntityName == "" && r.EntityAliasName == "" && r.ClientType == "entity" {
+			pki := IsPKIClient(r)
 			if r.MountPath == "" {
 				counts.NoMount++
+				if pki {
+					counts.NoMountPKI++
+				}
 				continue
 			}
 			counts.MergedDeleted++
+			if pki {
+				counts.MergedDeletedPKI++
+			}
 			continue
 		}
 		out = append(out, r)
diff --git a/internal/normalizer/normalizer_test.go b/internal/normalizer/normalizer_test.go
index 18a919b..e25fd5a 100644
--- a/internal/normalizer/normalizer_test.go
+++ b/internal/normalizer/normalizer_test.go
@@ -152,6 +152,10 @@ func TestFilterAbandonedClients(t *testing.T) {
 		{ClientID: "drop-merged-2", EntityName: "", EntityAliasName: "", MountPath: "auth/oidc/", MountType: ""},
 		// removed as no mount: mount path missing
 		{ClientID: "drop-nomount-1", EntityName: "", EntityAliasName: "", MountPath: "", MountType: "ldap"},
+		// removed as merged/deleted PKI (auth_cert accessor, mount present)
+		{ClientID: "drop-merged-pki-1", EntityName: "", EntityAliasName: "", MountPath: "auth/cert/", MountType: "cert", MountAccessor: "auth_cert_abc123"},
+		// removed as no-mount PKI (auth_cert accessor, mount missing)
+		{ClientID: "drop-nomount-pki-1", EntityName: "", EntityAliasName: "", MountPath: "", MountType: "cert", MountAccessor: "auth_cert_xyz789"},
 		// keep: entity name present
 		{ClientID: "keep-3", EntityName: "Alice", EntityAliasName: "", MountPath: "auth/ldap/", MountType: "ldap"},
 		// keep: entity alias present
@@ -159,14 +163,20 @@ func TestFilterAbandonedClients(t *testing.T) {
 	}
 
 	out, counts := FilterAbandonedClients(records)
-	if counts.NoMount != 1 {
-		t.Fatalf("expected NoMount=1, got %d", counts.NoMount)
+	if counts.NoMount != 2 {
+		t.Fatalf("expected NoMount=2, got %d", counts.NoMount)
 	}
-	if counts.MergedDeleted != 2 {
-		t.Fatalf("expected MergedDeleted=2, got %d", counts.MergedDeleted)
+	if counts.NoMountPKI != 1 {
+		t.Fatalf("expected NoMountPKI=1, got %d", counts.NoMountPKI)
 	}
-	if counts.Total() != 3 {
-		t.Fatalf("expected Total=3, got %d", counts.Total())
+	if counts.MergedDeleted != 3 {
+		t.Fatalf("expected MergedDeleted=3, got %d", counts.MergedDeleted)
+	}
+	if counts.MergedDeletedPKI != 1 {
+		t.Fatalf("expected MergedDeletedPKI=1, got %d", counts.MergedDeletedPKI)
+	}
+	if counts.Total() != 5 {
+		t.Fatalf("expected Total=5, got %d", counts.Total())
 	}
 	if len(out) != 2 {
 		t.Fatalf("expected 2 records after filter, got %d", len(out))

From ae5d444d9b48e064ecb89fe16d07f1f067e8b6b6 Mon Sep 17 00:00:00 2001
From: Schuyler Bishop <schuyler@ibm.com>
Date: Tue, 26 May 2026 18:36:51 -0500
Subject: [PATCH 5/8] Cleaning out options we don't want to present to
 customers

---
 .gitignore                             |   1 +
 README.md                              | 139 ++----
 cmd/vault-csv-normalizer/main.go       | 210 ++-------
 internal/normalizer/normalizer.go      | 248 +----------
 internal/normalizer/normalizer_test.go | 565 +------------------------
 5 files changed, 73 insertions(+), 1090 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7a00b69..f0f15bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 # Claude
 .claude/
 CLAUDE.md
+CLAUDE.local.md
 
 # Test data outputs
 testdata/out.csv
diff --git a/README.md b/README.md
index 66c0b46..606ce87 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ versions), and displays a summary of client counts by mount path and type.
 - Normalizes **namespace paths** (empty/`root` → `[root]`, ensures trailing `/`)
 - Normalizes **mount paths** (ensures trailing `/`)
 - Normalizes **timestamps** to UTC across all common Vault timestamp formats
-- **Deduplicates** clients across files by `client_id` when `-d` is set, by normalized `entity_alias_name` (`--dedup-alias`), or by alias within explicit auth-method groups (`--dedup-methods ldap,oidc`); alias normalization strips domain suffixes (`@corp.com`) and tier suffixes (`-t0`/`-t1`/`-t2`)
+- **Deduplicates** clients within each file by alias within explicit auth-method groups (`--dedup-methods-per-file ldap,oidc`); alias normalization strips domain suffixes (`@corp.com`)
 - **Filters** by namespace (substring) or client type
 - **Sorts** by any column
 - Prints a **summary** with counts broken down by mount path and client type
@@ -66,51 +66,31 @@ OPTIONS:
         Apply a since filter to one specific file only. May be specified
         multiple times for different files. The filename is matched against
         the base name (e.g. jan.csv=2024-01-15).
-  -d    Deduplicate records by client_id across all input files.
-  -dedup-alias
-        Deduplicate by entity_alias_name within the same identity group across
-        all input files. LDAP and OIDC are treated as one group (the same
-        person typically has the same username in both). Two records are
-        considered the same client if they share the same normalized alias AND
-        belong to the same identity group, regardless of mount accessor or
-        source file. Normalization strips the domain suffix (at '@') and any
-        trailing tier suffix (-t0, -t1, -t2), so "sbishop" (LDAP), "sbishop-t0"
-        (LDAP, another file), and "sbishop@corp.com" (OIDC) → one client.
-        JWT is a separate group and is not collapsed here; use --dedup-jwt for
-        JWT vs LDAP/OIDC dedup.
-        Duplicate groups are printed as a table before the summary.
-        Records without an alias are always kept. May be combined with -d.
-  -dedup-methods method1,method2,...
-        Apply alias deduplication (same normalization as --dedup-alias) but
-        only for records whose auth method appears in the specified
-        comma-separated group. Methods in the same group are treated as one
-        identity — a person authenticating via any of them is counted once.
-        Records whose auth method is not in any group pass through unchanged.
+  -dedup-methods-per-file method1,method2,...
+        Deduplicate by alias for records whose auth method appears in the
+        specified comma-separated group, scoped to each input file
+        independently. Records in different files with the same alias are NOT
+        collapsed — only within-file duplicates are removed. Normalization
+        strips domain suffixes (at '@') only; tier suffixes (-t0/-t1/-t2) are
+        kept. Records whose auth method is not in any group pass through
+        unchanged.
 
         The flag is repeatable; each use defines one independent group:
 
-          -dedup-methods ldap,oidc
-              Deduplicate LDAP and OIDC as one identity group. "alice" (LDAP),
-              "alice@corp.com" (OIDC), and "alice-t0" (LDAP) all normalize to
-              "alice" and are counted once. JWT records are unaffected.
-
-          -dedup-methods ldap,oidc,jwt
-              Treat LDAP, OIDC, and JWT together as one group.
-
-          -dedup-methods ldap,oidc -dedup-methods jwt,saml
-              Two independent groups: {ldap,oidc} and {jwt,saml}. Records in
-              different groups are never collapsed against each other.
-
-        Duplicate groups are printed as a table before the summary (same
-        format as --dedup-alias). Records without an alias and PKI clients are
-        always kept. May be combined with --dedup-alias, --dedup-jwt, and/or -d.
-  -dedup-jwt
-        Drop JWT records whose normalized alias matches a non-JWT record across
-        any input file. Uses the same normalization as --dedup-alias (strips
-        '@domain' and '-t0'/'-t1'/'-t2'). Prevents the same person from being
-        counted twice when they authenticate via both LDAP/OIDC and JWT.
-        Records without an alias are always kept. May be combined with
-        --dedup-alias, --dedup-methods, and/or -d.
+          -dedup-methods-per-file ldap,oidc
+              Within each file, collapse LDAP and OIDC records that share the
+              same alias. "alice" (LDAP) and "alice@corp.com" (OIDC) in the
+              same file normalize to "alice" and are counted once. A user in
+              jan.csv and feb.csv is NOT collapsed — counted once per file.
+
+          -dedup-methods-per-file ldap,oidc,jwt
+              Treat LDAP, OIDC, and JWT as one group within each file.
+
+          -dedup-methods-per-file ldap,oidc -dedup-methods-per-file jwt,saml
+              Two independent per-file groups.
+
+        Duplicate groups are printed as a table before the summary. Records
+        without an alias and PKI clients are always kept.
   -remove-abandoned-clients
         Remove abandoned clients where entity_name and entity_alias_name are
         both blank. This includes records with no auth mount (mount_path
@@ -169,41 +149,20 @@ vault-csv-normalizer -f jan.csv feb.csv --per-file
 # Debug: show all records grouped by mount path
 vault-csv-normalizer -f export.csv --debug
 
-# Deduplicate client_ids across files
-vault-csv-normalizer -f jan.csv feb.csv -d
-
-# Deduplicate by entity alias — strips domain (@corp.com) and tier (-t0/-t1/-t2)
-# "alice", "alice-t0", "alice-t1", "alice@corp.com" → counted as one client per file
-vault-csv-normalizer -f jan.csv feb.csv --dedup-alias
-
-# Combine both: alias dedup collapses tier/domain variants within each file,
-# then -d deduplicates the same client_id appearing across multiple files
-vault-csv-normalizer -f jan.csv feb.csv --dedup-alias -d
-
-# Drop JWT records where the same person already appears via LDAP or OIDC
-vault-csv-normalizer -f export.csv --dedup-jwt
-
-# Full dedup: collapse tiers, dedup client_ids, then drop redundant JWT records
-vault-csv-normalizer -f jan.csv feb.csv --dedup-alias -d --dedup-jwt
-
 # Remove abandoned clients from final totals
 vault-csv-normalizer -f export.csv --remove-abandoned-clients
 
 # Same as above, with debug count output for removed rows
 vault-csv-normalizer -f export.csv --remove-abandoned-clients --debug
 
-# Deduplicate LDAP and OIDC as one identity group — same person via either
-# method is counted once; other auth methods are unaffected
-vault-csv-normalizer -f export.csv --dedup-methods ldap,oidc
+# Within each file, collapse LDAP and OIDC records with the same alias
+vault-csv-normalizer -f jan.csv feb.csv --dedup-methods-per-file ldap,oidc
 
-# Treat LDAP, OIDC, and JWT together as one human-identity group
-vault-csv-normalizer -f export.csv --dedup-methods ldap,oidc,jwt
+# Treat LDAP, OIDC, and JWT as one group within each file
+vault-csv-normalizer -f jan.csv feb.csv --dedup-methods-per-file ldap,oidc,jwt
 
-# Two independent groups: {ldap,oidc} and {jwt,saml}
-vault-csv-normalizer -f export.csv -dedup-methods ldap,oidc --dedup-methods jwt,saml
-
-# Method-scoped dedup combined with client_id dedup
-vault-csv-normalizer -f jan.csv feb.csv --dedup-methods ldap,oidc -d
+# Two independent per-file groups: {ldap,oidc} and {jwt,saml}
+vault-csv-normalizer -f jan.csv feb.csv --dedup-methods-per-file ldap,oidc --dedup-methods-per-file jwt,saml
 
 # Exclude records created before 2024-06-01
 vault-csv-normalizer -f export.csv --since 2024-06-01
@@ -255,50 +214,26 @@ PKI Client Summary
 ## Alias-based deduplication
 
 Vault can record the same human as multiple clients when they authenticate via
-different auth methods (e.g. LDAP in one session and OIDC in another) or as
-tiered accounts (`alice`, `alice-t0`, `alice-t1`). The alias-based dedup flags
-collapse these into a single count.
+different auth methods (e.g. LDAP in one session and OIDC in another).
+`--dedup-methods-per-file` collapses these into a single count within each file.
 
 ### Alias normalization
 
-All alias-based dedup paths apply the same two-step normalization before
-comparing:
+`--dedup-methods-per-file` applies one normalization step before comparing:
 
 1. **Strip domain suffix** — everything from `@` onward is removed.
    `alice@corp.com` → `alice`
-2. **Strip tier suffix** — trailing `-t0`, `-t1`, or `-t2` is removed.
-   `alice-t0` → `alice`
-
-So `alice`, `alice-t0`, `alice-t1`, `alice@corp.com`, and `alice-t0@corp.com`
-all normalize to `alice` and are treated as the same person.
-
-### Choosing a dedup flag
-
-| Flag | What it collapses | What it leaves separate |
-|---|---|---|
-| `--dedup-alias` | All auth methods, grouped so LDAP=OIDC; each other type is its own group | JWT vs LDAP/OIDC |
-| `--dedup-methods ldap,oidc` | Only LDAP and OIDC, as one explicit group | Everything else untouched |
-| `--dedup-methods ldap,oidc,jwt` | LDAP, OIDC, and JWT as one group | Everything else untouched |
-| `--dedup-jwt` | JWT records that match an existing LDAP/OIDC alias | Non-JWT records |
-
-These flags are independent and can be combined. A common production workflow:
 
-```bash
-# Count human users once, across LDAP and OIDC, then remove JWT duplicates,
-# then collapse the same client_id appearing across multiple monthly exports
-vault-csv-normalizer -f jan.csv feb.csv mar.csv \
-  --dedup-methods ldap,oidc \
-  --dedup-jwt \
-  -d
-```
+Tier suffixes (`-t0`, `-t1`, `-t2`) are **not** stripped — `alice-t0` and
+`alice-t1` are treated as distinct identities within a file.
 
 ### Auth methods reference
 
 | `mount_type` / `auth_method` | Typical users | Notes |
 |---|---|---|
 | `ldap` | Humans | Aliases usually bare usernames (`alice`) or tiered (`alice-t0`) |
-| `oidc` | Humans | Aliases usually `username@domain.com` — normalize to same base as LDAP |
-| `jwt` | Humans or services | May share aliases with LDAP/OIDC; use `--dedup-jwt` or `--dedup-methods` |
+| `oidc` | Humans | Aliases usually `username@domain.com` — strip domain to match LDAP |
+| `jwt` | Humans or services | May share aliases with LDAP/OIDC; include in group to collapse |
 | `approle` | Service accounts | Not human; not typically alias-deduped |
 | `kubernetes` | Service accounts | Not human; not typically alias-deduped |
 | `aws` / `gcp` | Service accounts | Not human; not typically alias-deduped |
@@ -329,7 +264,7 @@ The tool expects CSVs exported from the Vault activity export API
 | `client_type`            | No       | Type of client (entity, non-entity, acme, etc.)  |
 | `token_creation_time`    | No       | RFC3339 timestamp of token creation              |
 | `client_first_usage_time`| No       | RFC3339 timestamp of first authenticated call    |
-| `entity_alias_name`      | No       | Human-readable alias for the entity (used by `--dedup-alias` and `--dedup-methods`; domain and tier suffixes are stripped during normalization) |
+| `entity_alias_name`      | No       | Human-readable alias for the entity (used by `--dedup-methods-per-file`; domain suffix is stripped during normalization) |
 
 ### Supported Column Aliases
 
diff --git a/cmd/vault-csv-normalizer/main.go b/cmd/vault-csv-normalizer/main.go
index 93d9845..cd56b06 100644
--- a/cmd/vault-csv-normalizer/main.go
+++ b/cmd/vault-csv-normalizer/main.go
@@ -39,7 +39,6 @@ func (f fileDateFlag) Set(v string) error {
 
 func main() {
 	var inputFiles multiFlag
-	var dedupMethods multiFlag
 	var dedupMethodsPerFile multiFlag
 	var sortBy string
 	var filterNS string
@@ -47,9 +46,6 @@ func main() {
 	var filterSince string
 	var filterSinceFile = make(fileDateFlag)
 	var countPKI bool
-	var dedup bool
-	var dedupAlias bool
-	var dedupJWT bool
 	var removeAbandonedClients bool
 	var listMethods bool
 	var debugMode bool
@@ -63,13 +59,9 @@ func main() {
 	flag.StringVar(&filterSince, "since", "", "Exclude records with a token_creation_time before this value (e.g. 2024-01-01 or 2024-01-01T00:00:00Z)")
 	flag.Var(&filterSinceFile, "since-file", "Apply a since filter to one file only: filename=date. May be specified multiple times for different files.")
 	flag.BoolVar(&countPKI, "p", false, "Partition and report PKI/cert clients (client_type=acme or mount_accessor prefix auth_cert) separately")
-	flag.BoolVar(&dedup, "d", false, "Deduplicate records by client_id across all input files")
-	flag.BoolVar(&dedupAlias, "dedup-alias", false, "Deduplicate by entity_alias_name (strips domain and -t0/-t1/-t2 tier suffixes; records without an alias are always kept; may be combined with -d)")
-	flag.Var(&dedupMethods, "dedup-methods", "Deduplicate by alias for the specified comma-separated auth methods, treating them as one identity group. Repeatable to define multiple groups (e.g. -dedup-methods ldap,oidc -dedup-methods jwt,saml).")
-	flag.Var(&dedupMethodsPerFile, "dedup-methods-per-file", "Like --dedup-methods but scoped to each input file independently. Records in different files are never collapsed against each other. Repeatable to define multiple groups.")
-	flag.BoolVar(&dedupJWT, "dedup-jwt", false, "Drop JWT records whose normalized alias matches a non-JWT record in the same file (prevents counting the same person via both LDAP/OIDC and JWT)")
+	flag.Var(&dedupMethodsPerFile, "dedup-methods-per-file", "Deduplicate by alias for the specified comma-separated auth methods, scoped to each input file independently. Records in different files are never collapsed against each other. Repeatable to define multiple groups.")
 	flag.BoolVar(&removeAbandonedClients, "remove-abandoned-clients", false, "Remove abandoned clients (blank entity_name and entity_alias_name) after deduplication. Includes records with no auth mount and merged/deleted entities.")
-	flag.BoolVar(&listMethods, "list-methods", false, "Print every distinct auth method found in the input files (with record counts and alias coverage), then exit. Useful for deciding --dedup-methods groups.")
+	flag.BoolVar(&listMethods, "list-methods", false, "Print every distinct auth method found in the input files (with record counts and alias coverage), then exit. Useful for deciding --dedup-methods-per-file groups.")
 	flag.BoolVar(&debugMode, "debug", false, "Print all records grouped by mount path")
 	flag.BoolVar(&perFile, "per-file", false, "Print a summary for each input file before the combined summary")
 	flag.BoolVar(&showHelp, "help", false, "Show usage information")
@@ -119,22 +111,6 @@ func main() {
 		os.Exit(0)
 	}
 
-	// Parse --dedup-methods values into groups. Each flag value is a
-	// comma-separated list of mount types that form one identity group.
-	var methodGroups [][]string
-	for _, val := range dedupMethods {
-		var group []string
-		for _, m := range strings.Split(val, ",") {
-			m = strings.TrimSpace(strings.ToLower(m))
-			if m != "" {
-				group = append(group, m)
-			}
-		}
-		if len(group) > 0 {
-			methodGroups = append(methodGroups, group)
-		}
-	}
-
 	var methodGroupsPerFile [][]string
 	for _, val := range dedupMethodsPerFile {
 		var group []string
@@ -149,39 +125,7 @@ func main() {
 		}
 	}
 
-	// Snapshot pre-dedup records so debug mode can show alias groups from the
-	// original data regardless of which dedup flags are active.
 	preDedup := normalized
-	if dedupAlias {
-		groups := normalizer.FindAliasDuplicates(preDedup)
-		if len(groups) > 0 {
-			fmt.Fprintf(os.Stdout, "Alias duplicates found (%d group(s))\n", len(groups))
-			fmt.Fprintln(os.Stdout, "=====================================")
-			for _, group := range groups {
-				r0 := group[0]
-				fmt.Fprintf(os.Stdout, "\nAlias group: %q  file: %s\n",
-					normalizer.StripTierSuffix(normalizer.BaseAlias(r0.EntityAliasName)), filepath.Base(r0.Source))
-				renderer.PrintTable(os.Stdout, group)
-			}
-			fmt.Fprintln(os.Stdout)
-		}
-		normalized = normalizer.DeduplicateByAlias(normalized)
-	}
-	if len(methodGroups) > 0 {
-		groups := normalizer.FindAliasDuplicatesForMethods(preDedup, methodGroups)
-		if len(groups) > 0 {
-			fmt.Fprintf(os.Stdout, "Method-scoped alias duplicates found (%d group(s))\n", len(groups))
-			fmt.Fprintln(os.Stdout, "================================================")
-			for _, group := range groups {
-				r0 := group[0]
-				fmt.Fprintf(os.Stdout, "\nAlias group: %q  file: %s\n",
-					normalizer.StripTierSuffix(normalizer.BaseAlias(r0.EntityAliasName)), filepath.Base(r0.Source))
-				renderer.PrintTable(os.Stdout, group)
-			}
-			fmt.Fprintln(os.Stdout)
-		}
-		normalized = normalizer.DeduplicateByAliasForMethods(normalized, methodGroups)
-	}
 	if len(methodGroupsPerFile) > 0 {
 		groups := normalizer.FindAliasDuplicatesForMethodsPerFile(preDedup, methodGroupsPerFile)
 		if len(groups) > 0 {
@@ -198,34 +142,6 @@ func main() {
 		normalized = normalizer.DeduplicateByAliasForMethodsPerFile(normalized, methodGroupsPerFile)
 	}
 
-	// Collect -d dedup statistics before running so debug mode can report
-	// exactly which client_ids were (or weren't) collapsed.
-	var clientIDDupsBefore int
-	var clientIDDupsAfter int
-	var clientIDDupMap map[string]int // client_id → count of input records
-	if dedup && debugMode {
-		clientIDDupsBefore = len(normalized)
-		idCount := make(map[string]int, len(normalized))
-		for _, r := range normalized {
-			idCount[r.ClientID]++
-		}
-		clientIDDupMap = make(map[string]int)
-		for id, n := range idCount {
-			if n > 1 {
-				clientIDDupMap[id] = n
-			}
-		}
-	}
-	if dedup {
-		normalized = normalizer.Deduplicate(normalized)
-		if debugMode {
-			clientIDDupsAfter = len(normalized)
-		}
-	}
-	if dedupJWT {
-		normalized = normalizer.DeduplicateJWT(normalized)
-	}
-
 	removedAbandonedCounts := normalizer.AbandonedClientCounts{}
 	if removeAbandonedClients {
 		normalized, removedAbandonedCounts = normalizer.FilterAbandonedClients(normalized)
@@ -261,46 +177,6 @@ func main() {
 	}
 
 	if debugMode {
-		// Show -d dedup results so the user can see which client_ids were (or
-		// weren't) collapsed, and understand why records still appear after dedup.
-		if dedup {
-			collapsed := clientIDDupsBefore - clientIDDupsAfter
-			fmt.Fprintf(os.Stdout, "Debug: -d client_id dedup — before: %d  after: %d  collapsed: %d\n",
-				clientIDDupsBefore, clientIDDupsAfter, collapsed)
-			fmt.Fprintln(os.Stdout, strings.Repeat("-", 70))
-			if len(clientIDDupMap) > 0 {
-				dupIDs := make([]string, 0, len(clientIDDupMap))
-				for id := range clientIDDupMap {
-					dupIDs = append(dupIDs, id)
-				}
-				sort.Strings(dupIDs)
-				for _, id := range dupIDs {
-					fmt.Fprintf(os.Stdout, "  %s  (x%d → kept 1)\n", id, clientIDDupMap[id])
-				}
-			} else {
-				fmt.Fprintln(os.Stdout, "  (no duplicate client_ids found)")
-			}
-			fmt.Fprintln(os.Stdout)
-		}
-
-		// Show alias groups from the original (pre-dedup) data so the user can
-		// see aliasing context regardless of which dedup flags are active.
-		// Skip when -dedup-alias is set because it already printed these above.
-		if !dedupAlias {
-			groups := normalizer.FindAliasDuplicates(preDedup)
-			if len(groups) > 0 {
-				fmt.Fprintf(os.Stdout, "Debug: alias groups in input data (%d group(s))\n", len(groups))
-				fmt.Fprintln(os.Stdout, "===============================================")
-				for _, group := range groups {
-					r0 := group[0]
-					fmt.Fprintf(os.Stdout, "\nAlias group: %q  file: %s\n",
-						normalizer.StripTierSuffix(normalizer.BaseAlias(r0.EntityAliasName)), filepath.Base(r0.Source))
-					renderer.PrintTable(os.Stdout, group)
-				}
-				fmt.Fprintln(os.Stdout)
-			}
-		}
-
 		// Group final (post-dedup) records by mount path.
 		var mountOrder []string
 		byMount := make(map[string][]normalizer.Record)
@@ -321,7 +197,7 @@ func main() {
 			fmt.Fprintf(os.Stdout, "\nMount: %s (%d record(s))\n", mp, len(group))
 			renderer.PrintTable(os.Stdout, group)
 			// Flag records within this mount that share an entity alias but have
-			// different client_ids — these are candidates for -dedup-alias.
+			// different client_ids — use --dedup-methods-per-file to collapse them.
 			if len(group) > 1 {
 				aliasToIDs := make(map[string][]string)
 				for _, r := range group {
@@ -335,7 +211,7 @@ func main() {
 					if len(ids) < 2 {
 						continue
 					}
-					// Check that not all client_ids are the same (already handled by -d).
+					// Check that not all client_ids are the same.
 					allSame := true
 					for _, id := range ids[1:] {
 						if id != ids[0] {
@@ -344,7 +220,7 @@ func main() {
 						}
 					}
 					if !allSame {
-						fmt.Fprintf(os.Stdout, "  !! alias %q has %d records with different client_ids — use -dedup-alias to collapse\n", alias, len(ids))
+						fmt.Fprintf(os.Stdout, "  !! alias %q has %d records with different client_ids — use --dedup-methods-per-file to collapse\n", alias, len(ids))
 					}
 				}
 			}
@@ -417,8 +293,8 @@ func printMethodList(records []normalizer.Record, files []string) {
 		fmt.Fprintf(os.Stdout, "  %-20s  %8d  %10d\n", mt, s.total, s.withAlias)
 	}
 	fmt.Fprintln(os.Stdout)
-	fmt.Fprintln(os.Stdout, "Tip: use --dedup-methods to group methods into human/machine identity sets.")
-	fmt.Fprintln(os.Stdout, "  Example: --dedup-methods ldap,oidc,jwt --dedup-methods approle,kubernetes")
+	fmt.Fprintln(os.Stdout, "Tip: use --dedup-methods-per-file to group methods into human/machine identity sets.")
+	fmt.Fprintln(os.Stdout, "  Example: --dedup-methods-per-file ldap,oidc,jwt --dedup-methods-per-file approle,kubernetes")
 }
 
 func printUsage() {
@@ -476,65 +352,31 @@ CSV FORMAT (Vault activity export):
 
   Optional column:
     entity_alias_name  (also accepted as: alias_name, entity_alias)
-      When present, --dedup-alias collapses records that share the same
-      normalized alias within the same identity group across all input files.
-      LDAP and OIDC are treated as one group. Normalization strips the domain
-      suffix (at '@') and any trailing tier suffix (-t0, -t1, -t2).
-      "sbishop" (LDAP, jan.csv), "sbishop-t0" (LDAP, feb.csv), and
-      "sbishop@corp.com" (OIDC) → one client. JWT is a separate group;
-      use --dedup-jwt to additionally collapse JWT against LDAP/OIDC.
-
-      --dedup-jwt uses the same normalization to match JWT records against
-      non-JWT records in the same file. A JWT record is dropped if a non-JWT
-      record (e.g. LDAP or OIDC) shares the same normalized alias, preventing
-      the same person from being counted twice when they authenticate via both
-      methods. Can be combined with --dedup-alias and/or -d.
-
-  --dedup-methods <method1,method2,...>
-      Apply alias deduplication (same normalization as --dedup-alias) but only
-      for records whose auth method appears in the specified comma-separated
-      group. Methods in the same group are treated as one identity — a person
-      authenticating via any of them is counted once. Records whose auth method
-      is not in any group pass through unchanged.
-
-      The flag is repeatable; each use defines one independent group:
-
-        --dedup-methods ldap,oidc
-            Deduplicate LDAP and OIDC as one identity group. "alice" (LDAP),
-            "alice@corp.com" (OIDC), and "alice-t0" (LDAP) all normalize to
-            "alice" and are counted once.
-
-        --dedup-methods ldap,oidc,jwt
-            Treat LDAP, OIDC, and JWT together as one group.
-
-        --dedup-methods ldap,oidc --dedup-methods jwt,saml
-            Two independent groups: {ldap,oidc} and {jwt,saml}. A person
-            appearing in both LDAP and OIDC is counted once; a person
-            appearing in both JWT and SAML is counted once; but an LDAP
-            record and a JWT record for the same person are not collapsed
-            (unless both groups are merged into one).
-
-      Can be combined with --dedup-alias, --dedup-jwt, and/or -d.
+      When present, --dedup-methods-per-file collapses records that share
+      the same normalized alias within each source file. Normalization strips
+      the domain suffix (at '@'). Tier suffixes (-t0, -t1, -t2) are NOT
+      stripped — "sbishop-t0" and "sbishop-t1" are treated as distinct
+      identities within a file.
 
   --dedup-methods-per-file <method1,method2,...>
-      Like --dedup-methods but deduplication is scoped to each input file
-      independently. Records in different files with the same normalized alias
-      are NOT collapsed against each other — only within-file duplicates are
-      removed. Useful when files represent different billing periods and you
-      want to count a returning user once per file rather than once globally.
+      Deduplicate by alias for records whose auth method appears in the
+      specified comma-separated group, scoped to each input file
+      independently. Records in different files with the same normalized
+      alias are NOT collapsed — only within-file duplicates are removed.
+      Useful when files represent different billing periods and you want to
+      count a returning user once per file rather than once globally.
 
-      Uses the same alias normalization and method-grouping syntax as
-      --dedup-methods (repeatable, comma-separated groups).
+      Normalization strips domain suffixes (at '@') only — tier suffixes
+      like -t0/-t1 are kept, so "alice-t0" and "alice-t1" are distinct.
+
+      The flag is repeatable; each use defines one independent group:
 
         --dedup-methods-per-file ldap,oidc
             Within each file, collapse LDAP and OIDC records that share the
-            same alias (exact match; tier suffixes like -t0/-t1 are distinct).
-            A user in jan.csv (LDAP) and feb.csv (OIDC) is NOT collapsed —
-            they appear once per file.
+            same alias. A user in jan.csv (LDAP) and feb.csv (OIDC) is NOT
+            collapsed — they appear once per file.
 
         --dedup-methods-per-file ldap,oidc --dedup-methods-per-file jwt,saml
-            Two independent per-file groups. Same alias collapsing rules as
-            --dedup-methods but strictly within each source file.
-
-      Can be combined with --dedup-methods, --dedup-alias, --dedup-jwt, and/or -d.`)
+            Two independent per-file groups. Records in different groups are
+            never collapsed against each other.`)
 }
diff --git a/internal/normalizer/normalizer.go b/internal/normalizer/normalizer.go
index df9a2d6..b6cb8d0 100644
--- a/internal/normalizer/normalizer.go
+++ b/internal/normalizer/normalizer.go
@@ -160,27 +160,6 @@ func ParseTime(raw string) time.Time {
 	return time.Time{} // unparseable → zero value
 }
 
-// Deduplicate removes records with duplicate ClientIDs. When duplicates exist,
-// the record with a non-empty MountPath is preferred over one with an empty
-// MountPath; otherwise the first occurrence is kept.
-func Deduplicate(records []Record) []Record {
-	index := make(map[string]int, len(records)) // client_id → position in out
-	out := make([]Record, 0, len(records))
-	for _, r := range records {
-		i, seen := index[r.ClientID]
-		if !seen {
-			index[r.ClientID] = len(out)
-			out = append(out, r)
-			continue
-		}
-		// Upgrade an empty-mount record if we now have a real mount path.
-		if out[i].MountPath == "" && r.MountPath != "" {
-			out[i] = r
-		}
-	}
-	return out
-}
-
 // BaseAlias returns the portion of an entity alias name before the first '@'
 // character. If no '@' is present the full name is returned.
 // Example: "alice@corp.com" → "alice", "sbishop@hashicorp.com" → "sbishop",
@@ -206,96 +185,12 @@ func StripTierSuffix(name string) string {
 }
 
 // aliasKey is the deduplication key for alias-based dedup: one record is
-// allowed per (normalized alias, mount type) pair across all input files.
-// Including the mount type prevents --dedup-alias from collapsing records
-// across different auth methods (e.g. LDAP vs JWT); use --dedup-jwt for that.
+// allowed per (normalized alias, mount type) pair.
 type aliasKey struct {
 	base      string
 	mountType string
 }
 
-// dedupMountGroup maps mount types that represent the same identity provider
-// to a single canonical value. OIDC and LDAP are treated as one group because
-// the same person typically has the same username in both systems.
-func dedupMountGroup(mt string) string {
-	if mt == "oidc" {
-		return "ldap"
-	}
-	return mt
-}
-
-// aliasKeyFor computes the dedup key for a record. It strips the domain suffix
-// (at '@') and any trailing tier suffix ("-t0"/"-t1"/"-t2"), and scopes the
-// key to the mount group so that only records of the same identity type
-// collapse. OIDC and LDAP share a group; JWT remains separate (use
-// --dedup-jwt for JWT vs LDAP/OIDC dedup).
-func aliasKeyFor(r Record) aliasKey {
-	mt := r.MountType
-	if mt == "" {
-		mt = r.AuthMethod
-	}
-	return aliasKey{
-		base:      StripTierSuffix(BaseAlias(r.EntityAliasName)),
-		mountType: dedupMountGroup(mt),
-	}
-}
-
-// FindAliasDuplicates groups records by (BaseAlias, source file) and returns
-// every group that contains more than one record. Records with a blank
-// EntityAliasName or that are PKI clients are ignored. Groups are returned in
-// the order the first member of each group appeared in records.
-func FindAliasDuplicates(records []Record) [][]Record {
-	type entry struct {
-		key     aliasKey
-		members []Record
-	}
-	index := make(map[aliasKey]int)
-	var entries []entry
-
-	for _, r := range records {
-		if r.EntityAliasName == "" || IsPKIClient(r) {
-			continue
-		}
-		k := aliasKeyFor(r)
-		if idx, ok := index[k]; ok {
-			entries[idx].members = append(entries[idx].members, r)
-		} else {
-			index[k] = len(entries)
-			entries = append(entries, entry{key: k, members: []Record{r}})
-		}
-	}
-
-	var out [][]Record
-	for _, e := range entries {
-		if len(e.members) > 1 {
-			out = append(out, e.members)
-		}
-	}
-	return out
-}
-
-// DeduplicateByAlias keeps at most one record per (BaseAlias, source file)
-// combination. The same user authenticating via multiple mount accessors in
-// the same file is collapsed to one record. Records with a blank
-// EntityAliasName or that are PKI clients are always kept.
-func DeduplicateByAlias(records []Record) []Record {
-	seen := make(map[aliasKey]struct{}, len(records))
-	out := make([]Record, 0, len(records))
-	for _, r := range records {
-		if r.EntityAliasName == "" || IsPKIClient(r) {
-			out = append(out, r)
-			continue
-		}
-		k := aliasKeyFor(r)
-		if _, dup := seen[k]; dup {
-			continue
-		}
-		seen[k] = struct{}{}
-		out = append(out, r)
-	}
-	return out
-}
-
 // buildMethodGroupMap converts a list of groups (each a slice of mount-type
 // strings) into a map from every member to the group's canonical value (the
 // first element of the group). Methods not present in any group are absent
@@ -314,96 +209,6 @@ func buildMethodGroupMap(groups [][]string) map[string]string {
 	return m
 }
 
-// aliasKeyForMethods computes the dedup key for a record using a caller-supplied
-// group map (from buildMethodGroupMap). If the record's mount type is not in the
-// map the second return value is false, meaning the record should not participate
-// in method-scoped dedup.
-func aliasKeyForMethods(r Record, groupMap map[string]string) (aliasKey, bool) {
-	mt := r.MountType
-	if mt == "" {
-		mt = r.AuthMethod
-	}
-	canonical, ok := groupMap[mt]
-	if !ok {
-		return aliasKey{}, false
-	}
-	return aliasKey{
-		base:      StripTierSuffix(BaseAlias(r.EntityAliasName)),
-		mountType: canonical,
-	}, true
-}
-
-// FindAliasDuplicatesForMethods is the same as FindAliasDuplicates but only
-// considers records whose auth method (MountType or AuthMethod) appears in one
-// of the provided groups. Each group is a slice of mount-type strings that
-// should be treated as the same identity (e.g. ["ldap","oidc"]). Records whose
-// method is not in any group are not reported. Groups are independent: records
-// in different groups are never compared against each other.
-func FindAliasDuplicatesForMethods(records []Record, groups [][]string) [][]Record {
-	groupMap := buildMethodGroupMap(groups)
-
-	type entry struct {
-		key     aliasKey
-		members []Record
-	}
-	index := make(map[aliasKey]int)
-	var entries []entry
-
-	for _, r := range records {
-		if r.EntityAliasName == "" || IsPKIClient(r) {
-			continue
-		}
-		k, ok := aliasKeyForMethods(r, groupMap)
-		if !ok {
-			continue
-		}
-		if idx, exists := index[k]; exists {
-			entries[idx].members = append(entries[idx].members, r)
-		} else {
-			index[k] = len(entries)
-			entries = append(entries, entry{key: k, members: []Record{r}})
-		}
-	}
-
-	var out [][]Record
-	for _, e := range entries {
-		if len(e.members) > 1 {
-			out = append(out, e.members)
-		}
-	}
-	return out
-}
-
-// DeduplicateByAliasForMethods applies the same alias dedup logic as
-// DeduplicateByAlias but only for records whose auth method appears in one of
-// the provided groups. Each group is a slice of mount-type strings treated as
-// one identity (e.g. ["ldap","oidc"]). Records whose method is not in any group
-// pass through unchanged. Records with a blank EntityAliasName or that are PKI
-// clients are always kept.
-func DeduplicateByAliasForMethods(records []Record, groups [][]string) []Record {
-	groupMap := buildMethodGroupMap(groups)
-	seen := make(map[aliasKey]struct{}, len(records))
-	out := make([]Record, 0, len(records))
-	for _, r := range records {
-		if r.EntityAliasName == "" || IsPKIClient(r) {
-			out = append(out, r)
-			continue
-		}
-		k, ok := aliasKeyForMethods(r, groupMap)
-		if !ok {
-			// Method not in any group — pass through untouched.
-			out = append(out, r)
-			continue
-		}
-		if _, dup := seen[k]; dup {
-			continue
-		}
-		seen[k] = struct{}{}
-		out = append(out, r)
-	}
-	return out
-}
-
 // aliasKeyInFile is the deduplication key for per-file alias dedup. It includes
 // the source file so records from different files are never collapsed together.
 type aliasKeyInFile struct {
@@ -450,11 +255,10 @@ func aliasKeyInFileFor(r Record, groupMap map[string]string) (aliasKeyInFile, bo
 	}, true
 }
 
-// FindAliasDuplicatesForMethodsPerFile is like FindAliasDuplicatesForMethods
-// but only collapses records within the same source file. Records in different
-// files with the same alias are not reported as duplicates. Matching uses only
-// the portion of the alias left of '@'; tier suffixes (-t0/-t1/-t2) are not
-// stripped and must match exactly.
+// FindAliasDuplicatesForMethodsPerFile groups records by normalized alias within
+// each source file. Records in different files with the same alias are not
+// reported as duplicates. Matching uses only the portion of the alias left of
+// '@'; tier suffixes (-t0/-t1/-t2) are not stripped and must match exactly.
 func FindAliasDuplicatesForMethodsPerFile(records []Record, groups [][]string) [][]Record {
 	groupMap := buildMethodGroupMap(groups)
 
@@ -490,9 +294,9 @@ func FindAliasDuplicatesForMethodsPerFile(records []Record, groups [][]string) [
 	return out
 }
 
-// DeduplicateByAliasForMethodsPerFile applies alias dedup like
-// DeduplicateByAliasForMethods but scoped to each source file independently.
-// Records in different files are never collapsed; only records from the same
+// DeduplicateByAliasForMethodsPerFile deduplicates by alias scoped to each
+// source file independently. Records in different files are never collapsed;
+// only records from the same
 // file with the same normalized alias and method group are deduplicated.
 // Matching uses only the portion of the alias left of '@'; tier suffixes
 // (-t0/-t1/-t2) are not stripped and must match exactly.
@@ -520,42 +324,6 @@ func DeduplicateByAliasForMethodsPerFile(records []Record, groups [][]string) []
 	return out
 }
 
-// isJWT reports whether r was authenticated via JWT.
-func isJWT(r Record) bool {
-	return r.MountType == "jwt" || r.AuthMethod == "jwt"
-}
-
-// DeduplicateJWT drops JWT records whose normalized alias (StripTierSuffix +
-// BaseAlias) matches a non-JWT record's normalized alias in the same source
-// file. This prevents the same person from being counted once for their LDAP
-// or OIDC identity and again for their JWT identity. Records without an alias
-// are always kept.
-func DeduplicateJWT(records []Record) []Record {
-	// Build global set of normalized aliases from all non-JWT records.
-	nonJWTAliases := make(map[string]struct{})
-	for _, r := range records {
-		if isJWT(r) || r.EntityAliasName == "" {
-			continue
-		}
-		norm := StripTierSuffix(BaseAlias(r.EntityAliasName))
-		if norm != "" {
-			nonJWTAliases[norm] = struct{}{}
-		}
-	}
-
-	out := make([]Record, 0, len(records))
-	for _, r := range records {
-		if isJWT(r) && r.EntityAliasName != "" {
-			norm := StripTierSuffix(BaseAlias(r.EntityAliasName))
-			if _, match := nonJWTAliases[norm]; match {
-				continue
-			}
-		}
-		out = append(out, r)
-	}
-	return out
-}
-
 // IsPKIClient reports whether r is a PKI/cert client. It matches on either:
 //   - client_type == "acme" (ACME protocol clients from the PKI secrets engine), or
 //   - mount_accessor starting with "auth_cert" (cert auth method clients)
diff --git a/internal/normalizer/normalizer_test.go b/internal/normalizer/normalizer_test.go
index e25fd5a..6ebefa5 100644
--- a/internal/normalizer/normalizer_test.go
+++ b/internal/normalizer/normalizer_test.go
@@ -188,38 +188,6 @@ func TestFilterAbandonedClients(t *testing.T) {
 	}
 }
 
-func TestDeduplicate_PrefersNonEmptyMount(t *testing.T) {
-	records := []Record{
-		{ClientID: "abc", MountPath: ""},
-		{ClientID: "abc", MountPath: "auth/ldap/"},
-		{ClientID: "xyz", MountPath: "auth/approle/"},
-		{ClientID: "xyz", MountPath: ""},
-	}
-	out := Deduplicate(records)
-	if len(out) != 2 {
-		t.Fatalf("expected 2 records after dedup, got %d", len(out))
-	}
-	for _, r := range out {
-		if r.MountPath == "" {
-			t.Errorf("client %q kept empty-mount record when a non-empty mount was available", r.ClientID)
-		}
-	}
-}
-
-func TestDeduplicate_KeepsFirstWhenBothEmpty(t *testing.T) {
-	records := []Record{
-		{ClientID: "abc", MountPath: "", AuthMethod: "first"},
-		{ClientID: "abc", MountPath: "", AuthMethod: "second"},
-	}
-	out := Deduplicate(records)
-	if len(out) != 1 {
-		t.Fatalf("expected 1 record, got %d", len(out))
-	}
-	if out[0].AuthMethod != "first" {
-		t.Errorf("expected first occurrence to be kept, got AuthMethod=%q", out[0].AuthMethod)
-	}
-}
-
 func TestFilterSince(t *testing.T) {
 	records := []Record{
 		{ClientID: "old", TokenCreationTime: time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)},
@@ -308,7 +276,7 @@ func TestStripTierSuffix(t *testing.T) {
 }
 
 func TestStripTierSuffix_AfterBaseAlias(t *testing.T) {
-	// The combination used by aliasKeyFor: strip domain then tier.
+	// Strip domain then tier suffix.
 	cases := []struct{ in, want string }{
 		{"alice-t0@corp.com", "alice"},
 		{"alice-t1@corp.com", "alice"},
@@ -324,122 +292,6 @@ func TestStripTierSuffix_AfterBaseAlias(t *testing.T) {
 	}
 }
 
-func TestDeduplicateByAlias_CollapsesSameBaseAcrossAccessors(t *testing.T) {
-	// "sbishop", "sbishop@hashicorp.com", "sbishop-t0", "sbishop-t1", and
-	// "sbishop" in a second file all normalize to "sbishop" → only the first
-	// occurrence across all files is kept.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "sbishop", MountAccessor: "auth_ldap_abc123", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "sbishop@hashicorp.com", MountAccessor: "auth_jwt_def456", Source: "jan.csv"}, // dup: normalizes to "sbishop"
-		{ClientID: "3", EntityAliasName: "sbishop-t0", MountAccessor: "auth_ldap_abc123", Source: "jan.csv"},           // dup: tier stripped → "sbishop"
-		{ClientID: "4", EntityAliasName: "sbishop-t1", MountAccessor: "auth_oidc_xyz789", Source: "jan.csv"},           // dup: tier stripped → "sbishop"
-		{ClientID: "5", EntityAliasName: "sbishop", MountAccessor: "auth_ldap_abc123", Source: "feb.csv"},              // dup: same normalized alias across files
-		{ClientID: "6", EntityAliasName: ""}, // kept: blank always kept
-	}
-	out := DeduplicateByAlias(records)
-	if len(out) != 2 {
-		t.Fatalf("expected 2 records, got %d: %v", len(out), clientIDs(out))
-	}
-	kept := clientIDSet(out)
-	for _, id := range []string{"1", "6"} {
-		if !kept[id] {
-			t.Errorf("expected ClientID=%s to be kept", id)
-		}
-	}
-	for _, id := range []string{"2", "3", "4", "5"} {
-		if kept[id] {
-			t.Errorf("expected ClientID=%s to be dropped", id)
-		}
-	}
-}
-
-func TestDeduplicateByAlias_KeepsAllBlanks(t *testing.T) {
-	records := []Record{
-		{ClientID: "1", EntityAliasName: ""},
-		{ClientID: "2", EntityAliasName: ""},
-		{ClientID: "3", EntityAliasName: "alice@corp.com", Source: "jan.csv"},
-	}
-	out := DeduplicateByAlias(records)
-	if len(out) != 3 {
-		t.Fatalf("expected 3 records (2 blanks + 1 aliased), got %d", len(out))
-	}
-}
-
-func TestFindAliasDuplicates_SameBaseAcrossAccessors(t *testing.T) {
-	// "sbishop", "sbishop@hashicorp.com", "sbishop-t0", and "sbishop" in a
-	// second file all normalize to "sbishop" → one group with 4 members.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "sbishop", MountAccessor: "auth_ldap_abc123", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "sbishop@hashicorp.com", MountAccessor: "auth_jwt_def456", Source: "jan.csv"},
-		{ClientID: "3", EntityAliasName: "sbishop-t0", MountAccessor: "auth_ldap_abc123", Source: "jan.csv"},
-		{ClientID: "4", EntityAliasName: "sbishop", MountAccessor: "auth_ldap_abc123", Source: "feb.csv"}, // cross-file dup
-		{ClientID: "5", EntityAliasName: ""}, // ignored
-	}
-	groups := FindAliasDuplicates(records)
-	if len(groups) != 1 {
-		t.Fatalf("expected 1 duplicate group, got %d", len(groups))
-	}
-	if len(groups[0]) != 4 {
-		t.Errorf("expected 4 members in group, got %d", len(groups[0]))
-	}
-	for _, r := range groups[0] {
-		if StripTierSuffix(BaseAlias(r.EntityAliasName)) != "sbishop" {
-			t.Errorf("unexpected record in group: %+v", r)
-		}
-	}
-}
-
-func TestFindAliasDuplicates_NoDuplicates(t *testing.T) {
-	// All different normalized aliases — no duplicates regardless of file.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "bob", Source: "jan.csv"},
-		{ClientID: "3", EntityAliasName: "carol", Source: "feb.csv"},
-		{ClientID: "4", EntityAliasName: ""},
-	}
-	groups := FindAliasDuplicates(records)
-	if len(groups) != 0 {
-		t.Errorf("expected no duplicate groups, got %d", len(groups))
-	}
-}
-
-func TestDeduplicateByAlias_IgnoresPKIClients(t *testing.T) {
-	// PKI clients are always kept regardless of alias duplication.
-	// Non-PKI clients with the same base alias in the same file are deduplicated.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "abc-123", ClientType: "acme", Source: "jan.csv"},             // PKI, kept
-		{ClientID: "2", EntityAliasName: "abc-456", ClientType: "acme", Source: "jan.csv"},             // PKI, kept (not deduped)
-		{ClientID: "3", EntityAliasName: "abc-789", MountAccessor: "auth_cert_xyz", Source: "jan.csv"}, // cert auth — PKI, kept
-		{ClientID: "4", EntityAliasName: "alice@corp", Source: "jan.csv"},                              // non-PKI, first: kept
-		{ClientID: "5", EntityAliasName: "alice@example.com", Source: "jan.csv"},                       // non-PKI dup: base "alice" already seen, dropped
-	}
-	out := DeduplicateByAlias(records)
-	if len(out) != 4 {
-		t.Fatalf("expected 4 records (3 PKI/cert + 1 non-PKI), got %d: %v", len(out), clientIDs(out))
-	}
-	kept := clientIDSet(out)
-	for _, id := range []string{"1", "2", "3", "4"} {
-		if !kept[id] {
-			t.Errorf("expected ClientID=%s to be kept", id)
-		}
-	}
-	if kept["5"] {
-		t.Errorf("expected ClientID=5 (non-PKI dup) to be dropped")
-	}
-}
-
-func TestFindAliasDuplicates_IgnoresPKIClients(t *testing.T) {
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "abc-123", ClientType: "acme", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "abc-456", ClientType: "acme", Source: "jan.csv"},
-		{ClientID: "3", EntityAliasName: "abc-789", MountAccessor: "auth_cert_xyz", Source: "jan.csv"},
-	}
-	groups := FindAliasDuplicates(records)
-	if len(groups) != 0 {
-		t.Errorf("expected no duplicate groups (all PKI/cert), got %d", len(groups))
-	}
-}
-
 // helpers for alias dedup tests
 func clientIDs(records []Record) []string {
 	ids := make([]string, len(records))
@@ -714,421 +566,6 @@ func TestFilterSincePerSource_EmptyMap(t *testing.T) {
 	}
 }
 
-// ── JWT deduplication ─────────────────────────────────────────────────────────
-
-func TestDeduplicateJWT_DropsJWTMatchingNonJWT(t *testing.T) {
-	// alice authenticates via LDAP (kept) and JWT (dropped — same normalized alias).
-	// bob has only a JWT record (kept — no non-JWT match).
-	// carol has a JWT record with no alias (always kept).
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"}, // dropped: normalizes to "alice", matches LDAP
-		{ClientID: "3", EntityAliasName: "bob@corp.com", MountType: "jwt", Source: "jan.csv"},   // kept: no non-JWT match for "bob"
-		{ClientID: "4", EntityAliasName: "", MountType: "jwt", Source: "jan.csv"},               // kept: blank alias always kept
-	}
-	out := DeduplicateJWT(records)
-	if len(out) != 3 {
-		t.Fatalf("expected 3 records, got %d: %v", len(out), clientIDs(out))
-	}
-	kept := clientIDSet(out)
-	for _, id := range []string{"1", "3", "4"} {
-		if !kept[id] {
-			t.Errorf("expected ClientID=%s to be kept", id)
-		}
-	}
-	if kept["2"] {
-		t.Error("expected ClientID=2 (JWT dup of LDAP alice) to be dropped")
-	}
-}
-
-func TestDeduplicateJWT_TierNormalizationApplied(t *testing.T) {
-	// LDAP alias is "alice-t0" (normalizes to "alice").
-	// JWT alias is "alice@corp.com" (normalizes to "alice").
-	// They match → JWT dropped.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice-t0", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"},
-	}
-	out := DeduplicateJWT(records)
-	if len(out) != 1 {
-		t.Fatalf("expected 1 record, got %d: %v", len(out), clientIDs(out))
-	}
-	if out[0].ClientID != "1" {
-		t.Errorf("expected LDAP record to be kept, got ClientID=%s", out[0].ClientID)
-	}
-}
-
-func TestDeduplicateJWT_MatchesAcrossFiles(t *testing.T) {
-	// JWT record in feb.csv matches an LDAP alias in jan.csv — cross-file match
-	// is intentional, JWT record is dropped.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "feb.csv"},
-	}
-	out := DeduplicateJWT(records)
-	if len(out) != 1 {
-		t.Fatalf("expected 1 record (cross-file JWT match dropped), got %d", len(out))
-	}
-	if out[0].ClientID != "1" {
-		t.Errorf("expected LDAP record kept, got ClientID=%s", out[0].ClientID)
-	}
-}
-
-func TestDeduplicateJWT_AuthMethodFallback(t *testing.T) {
-	// JWT identified via auth_method rather than mount_type.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", AuthMethod: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice@corp.com", AuthMethod: "jwt", Source: "jan.csv"},
-	}
-	out := DeduplicateJWT(records)
-	if len(out) != 1 {
-		t.Fatalf("expected 1 record, got %d: %v", len(out), clientIDs(out))
-	}
-	if out[0].ClientID != "1" {
-		t.Errorf("expected LDAP record kept, got ClientID=%s", out[0].ClientID)
-	}
-}
-
-func TestDeduplicateJWT_NonJWTRecordsUnaffected(t *testing.T) {
-	// No JWT records — nothing should be dropped.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "bob", MountType: "oidc", Source: "jan.csv"},
-	}
-	out := DeduplicateJWT(records)
-	if len(out) != 2 {
-		t.Fatalf("expected 2 records, got %d", len(out))
-	}
-}
-
-// ── combined alias + client_id deduplication ─────────────────────────────────
-
-func TestDeduplicateByAlias_ThenDeduplicate_CollapsesBothDimensions(t *testing.T) {
-	// --dedup-alias runs first (within-file tier/domain collapse), then -d
-	// (cross-file client_id collapse). Together they handle the case where the
-	// same person appears as different alias variants in the same file AND as the
-	// same client_id across multiple files.
-	//
-	// jan.csv: alice (id:1) and alice-t0 (id:2) → alias dedup keeps id:1, drops id:2
-	// feb.csv: alice (id:1) → same client_id as jan.csv survivor → -d drops it
-	// jan.csv: bob (id:3) → distinct alias and id → kept throughout
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice-t0", Source: "jan.csv"}, // dropped by alias dedup (tier → "alice")
-		{ClientID: "1", EntityAliasName: "alice", Source: "feb.csv"},    // dropped by -d (same id as jan survivor)
-		{ClientID: "3", EntityAliasName: "bob", Source: "jan.csv"},
-	}
-
-	afterAlias := DeduplicateByAlias(records)
-	afterBoth := Deduplicate(afterAlias)
-
-	if len(afterBoth) != 2 {
-		t.Fatalf("expected 2 records, got %d: %v", len(afterBoth), clientIDs(afterBoth))
-	}
-	kept := clientIDSet(afterBoth)
-	if !kept["1"] {
-		t.Error("expected id:1 to be kept")
-	}
-	if !kept["3"] {
-		t.Error("expected id:3 to be kept")
-	}
-	if kept["2"] {
-		t.Error("expected id:2 to be dropped by alias dedup")
-	}
-}
-
-func TestDeduplicateByAlias_CollapsesAcrossFiles(t *testing.T) {
-	// alice-t0 in jan.csv and alice-t1 in feb.csv both normalize to "alice" →
-	// alias dedup keeps only the first occurrence regardless of file.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice-t0", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice-t1", Source: "feb.csv"},
-	}
-
-	out := DeduplicateByAlias(records)
-
-	if len(out) != 1 {
-		t.Fatalf("expected 1 record (cross-file tier collapse), got %d", len(out))
-	}
-	if out[0].ClientID != "1" {
-		t.Errorf("expected first occurrence (id:1) to be kept, got id:%s", out[0].ClientID)
-	}
-}
-
-func TestDeduplicateByAlias_CollapseOIDCWithLDAP(t *testing.T) {
-	// LDAP and OIDC share the same identity group, so the same normalized alias
-	// across both auth methods is treated as one client.
-	// JWT remains a separate group and is not collapsed here.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, // dup: ldap/oidc group, normalizes to "alice"
-		{ClientID: "3", EntityAliasName: "alice-t0", MountType: "ldap", Source: "feb.csv"},       // dup: ldap/oidc group, tier stripped → "alice"
-		{ClientID: "4", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"},  // kept: jwt is a separate group
-		{ClientID: "5", EntityAliasName: "bob", MountType: "ldap", Source: "jan.csv"},            // kept: different alias
-	}
-	out := DeduplicateByAlias(records)
-	if len(out) != 3 {
-		t.Fatalf("expected 3 records, got %d: %v", len(out), clientIDs(out))
-	}
-	kept := clientIDSet(out)
-	for _, id := range []string{"1", "4", "5"} {
-		if !kept[id] {
-			t.Errorf("expected ClientID=%s to be kept", id)
-		}
-	}
-	for _, id := range []string{"2", "3"} {
-		if kept[id] {
-			t.Errorf("expected ClientID=%s to be dropped (same ldap/oidc group)", id)
-		}
-	}
-}
-
-func TestDeduplicateByAlias_ScopedToMountType(t *testing.T) {
-	// alice on LDAP and alice@corp.com on JWT share a normalized alias but have
-	// different mount types → --dedup-alias does NOT collapse them. Use
-	// --dedup-jwt to additionally collapse cross-auth-method duplicates.
-	// alice-t0 and alice on LDAP share both the normalized alias AND mount type
-	// → they ARE collapsed.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice-t0", MountType: "ldap", Source: "jan.csv"},      // dup: same type + base
-		{ClientID: "3", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"}, // kept: different mount type
-	}
-	out := DeduplicateByAlias(records)
-	if len(out) != 2 {
-		t.Fatalf("expected 2 records (alice/ldap + alice/jwt), got %d: %v", len(out), clientIDs(out))
-	}
-	kept := clientIDSet(out)
-	if !kept["1"] {
-		t.Error("expected id:1 (alice ldap) to be kept")
-	}
-	if !kept["3"] {
-		t.Error("expected id:3 (alice jwt) to be kept — different mount type, requires --dedup-jwt")
-	}
-	if kept["2"] {
-		t.Error("expected id:2 (alice-t0 ldap) to be dropped — same mount type and normalized alias")
-	}
-}
-
-// Regression: tiered accounts across files must be collapsed.
-// Before the fix, aliasKey included the source filename, so alice-t0 in
-// jan.csv and alice in feb.csv hashed to different keys and were never
-// compared — each was counted as a separate client.
-func TestDeduplicateByAlias_TieredAccountsAcrossFiles(t *testing.T) {
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice-t0", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice", Source: "feb.csv"},    // same person, different tier label
-		{ClientID: "3", EntityAliasName: "alice-t1", Source: "mar.csv"}, // same person, third file
-		{ClientID: "4", EntityAliasName: "bob", Source: "jan.csv"},      // different person, kept
-	}
-	out := DeduplicateByAlias(records)
-	if len(out) != 2 {
-		t.Fatalf("expected 2 records (alice collapsed to 1, bob kept), got %d: %v", len(out), clientIDs(out))
-	}
-	kept := clientIDSet(out)
-	if !kept["1"] {
-		t.Error("expected first alice occurrence (id:1) to be kept")
-	}
-	if !kept["4"] {
-		t.Error("expected bob (id:4) to be kept")
-	}
-	for _, id := range []string{"2", "3"} {
-		if kept[id] {
-			t.Errorf("expected ClientID=%s (tier variant of alice) to be dropped", id)
-		}
-	}
-}
-
-// ── method-scoped alias deduplication ────────────────────────────────────────
-
-func TestDeduplicateByAliasForMethods_LDAPAndOIDCGroup(t *testing.T) {
-	// Same as -dedup-alias LDAP/OIDC behavior, but specified explicitly.
-	// alice via LDAP is kept; alice@corp.com via OIDC is dropped (same group).
-	// alice via JWT is kept (not in the group).
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, // dropped
-		{ClientID: "3", EntityAliasName: "alice-t0", MountType: "ldap", Source: "feb.csv"},       // dropped: tier stripped
-		{ClientID: "4", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"},  // kept: jwt not in group
-		{ClientID: "5", EntityAliasName: "bob", MountType: "ldap", Source: "jan.csv"},            // kept: different alias
-	}
-	groups := [][]string{{"ldap", "oidc"}}
-	out := DeduplicateByAliasForMethods(records, groups)
-	if len(out) != 3 {
-		t.Fatalf("expected 3 records, got %d: %v", len(out), clientIDs(out))
-	}
-	kept := clientIDSet(out)
-	for _, id := range []string{"1", "4", "5"} {
-		if !kept[id] {
-			t.Errorf("expected ClientID=%s to be kept", id)
-		}
-	}
-	for _, id := range []string{"2", "3"} {
-		if kept[id] {
-			t.Errorf("expected ClientID=%s to be dropped", id)
-		}
-	}
-}
-
-func TestDeduplicateByAliasForMethods_MethodsNotInGroupPassThrough(t *testing.T) {
-	// approle records are not in any group and must pass through untouched,
-	// even if two share the same alias.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "svc-account", MountType: "approle", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "svc-account", MountType: "approle", Source: "jan.csv"}, // NOT deduped
-		{ClientID: "3", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "4", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, // dropped
-	}
-	groups := [][]string{{"ldap", "oidc"}}
-	out := DeduplicateByAliasForMethods(records, groups)
-	if len(out) != 3 {
-		t.Fatalf("expected 3 records (2 approle + 1 ldap), got %d: %v", len(out), clientIDs(out))
-	}
-	kept := clientIDSet(out)
-	for _, id := range []string{"1", "2", "3"} {
-		if !kept[id] {
-			t.Errorf("expected ClientID=%s to be kept", id)
-		}
-	}
-	if kept["4"] {
-		t.Error("expected ClientID=4 (oidc dup) to be dropped")
-	}
-}
-
-func TestDeduplicateByAliasForMethods_MultipleIndependentGroups(t *testing.T) {
-	// Group 1: {ldap, oidc}; Group 2: {jwt, saml}
-	// alice/ldap and alice/oidc collapse → 1 kept
-	// alice/jwt and alice/saml collapse → 1 kept
-	// The two groups don't interact with each other.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, // dropped (group 1)
-		{ClientID: "3", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"},  // kept (group 2 first)
-		{ClientID: "4", EntityAliasName: "alice", MountType: "saml", Source: "jan.csv"},          // dropped (group 2)
-		{ClientID: "5", EntityAliasName: "bob", MountType: "ldap", Source: "jan.csv"},            // kept: different alias
-	}
-	groups := [][]string{{"ldap", "oidc"}, {"jwt", "saml"}}
-	out := DeduplicateByAliasForMethods(records, groups)
-	if len(out) != 3 {
-		t.Fatalf("expected 3 records, got %d: %v", len(out), clientIDs(out))
-	}
-	kept := clientIDSet(out)
-	for _, id := range []string{"1", "3", "5"} {
-		if !kept[id] {
-			t.Errorf("expected ClientID=%s to be kept", id)
-		}
-	}
-	for _, id := range []string{"2", "4"} {
-		if kept[id] {
-			t.Errorf("expected ClientID=%s to be dropped", id)
-		}
-	}
-}
-
-func TestDeduplicateByAliasForMethods_ThreeMethodsOneGroup(t *testing.T) {
-	// ldap, oidc, jwt all in one group — alice across all three collapses to 1.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, // dropped
-		{ClientID: "3", EntityAliasName: "alice@corp.com", MountType: "jwt", Source: "jan.csv"},  // dropped
-		{ClientID: "4", EntityAliasName: "bob", MountType: "jwt", Source: "jan.csv"},             // kept: different alias
-	}
-	groups := [][]string{{"ldap", "oidc", "jwt"}}
-	out := DeduplicateByAliasForMethods(records, groups)
-	if len(out) != 2 {
-		t.Fatalf("expected 2 records, got %d: %v", len(out), clientIDs(out))
-	}
-	kept := clientIDSet(out)
-	if !kept["1"] {
-		t.Error("expected id:1 (alice ldap, first occurrence) to be kept")
-	}
-	if !kept["4"] {
-		t.Error("expected id:4 (bob) to be kept")
-	}
-}
-
-func TestDeduplicateByAliasForMethods_BlankAliasAlwaysKept(t *testing.T) {
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "", MountType: "oidc", Source: "jan.csv"},
-		{ClientID: "3", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-	}
-	groups := [][]string{{"ldap", "oidc"}}
-	out := DeduplicateByAliasForMethods(records, groups)
-	if len(out) != 3 {
-		t.Fatalf("expected 3 records (2 blank + 1 aliased), got %d", len(out))
-	}
-}
-
-func TestDeduplicateByAliasForMethods_PKIClientsAlwaysKept(t *testing.T) {
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "abc-123", ClientType: "acme", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "abc-123", ClientType: "acme", MountType: "oidc", Source: "jan.csv"},
-		{ClientID: "3", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "4", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"}, // dropped
-	}
-	groups := [][]string{{"ldap", "oidc"}}
-	out := DeduplicateByAliasForMethods(records, groups)
-	if len(out) != 3 {
-		t.Fatalf("expected 3 records (2 PKI + 1 non-PKI), got %d: %v", len(out), clientIDs(out))
-	}
-	kept := clientIDSet(out)
-	for _, id := range []string{"1", "2", "3"} {
-		if !kept[id] {
-			t.Errorf("expected ClientID=%s to be kept", id)
-		}
-	}
-	if kept["4"] {
-		t.Error("expected ClientID=4 to be dropped")
-	}
-}
-
-func TestDeduplicateByAliasForMethods_AuthMethodFallback(t *testing.T) {
-	// MountType is blank; dedup should fall back to AuthMethod.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", AuthMethod: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice@corp.com", AuthMethod: "oidc", Source: "jan.csv"}, // dropped
-	}
-	groups := [][]string{{"ldap", "oidc"}}
-	out := DeduplicateByAliasForMethods(records, groups)
-	if len(out) != 1 {
-		t.Fatalf("expected 1 record, got %d: %v", len(out), clientIDs(out))
-	}
-	if out[0].ClientID != "1" {
-		t.Errorf("expected id:1 to be kept, got %s", out[0].ClientID)
-	}
-}
-
-func TestFindAliasDuplicatesForMethods_ReportsGroupsOnly(t *testing.T) {
-	// Only ldap and oidc records should be reported as duplicates.
-	// approle records with the same alias are not in the group and not reported.
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "alice@corp.com", MountType: "oidc", Source: "jan.csv"},
-		{ClientID: "3", EntityAliasName: "alice", MountType: "approle", Source: "jan.csv"}, // not in group
-	}
-	groups := [][]string{{"ldap", "oidc"}}
-	dups := FindAliasDuplicatesForMethods(records, groups)
-	if len(dups) != 1 {
-		t.Fatalf("expected 1 duplicate group, got %d", len(dups))
-	}
-	if len(dups[0]) != 2 {
-		t.Errorf("expected 2 members in group (ldap + oidc), got %d", len(dups[0]))
-	}
-}
-
-func TestFindAliasDuplicatesForMethods_NoDuplicates(t *testing.T) {
-	records := []Record{
-		{ClientID: "1", EntityAliasName: "alice", MountType: "ldap", Source: "jan.csv"},
-		{ClientID: "2", EntityAliasName: "bob", MountType: "oidc", Source: "jan.csv"},
-	}
-	groups := [][]string{{"ldap", "oidc"}}
-	dups := FindAliasDuplicatesForMethods(records, groups)
-	if len(dups) != 0 {
-		t.Errorf("expected no duplicate groups, got %d", len(dups))
-	}
-}
-
 // ── input mutation safety ─────────────────────────────────────────────────────
 // These tests guard against the records[:0] pattern, which reuses the backing
 // array and silently corrupts the caller's slice. Each filter must not modify

From a0e30213319b49bdb0df0eb60cd800506a411e85 Mon Sep 17 00:00:00 2001
From: Schuyler Bishop <schuyler@ibm.com>
Date: Wed, 27 May 2026 08:44:04 -0500
Subject: [PATCH 6/8] Removed links to the git repo

---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index 606ce87..63dc853 100644
--- a/README.md
+++ b/README.md
@@ -28,8 +28,6 @@ versions), and displays a summary of client counts by mount path and type.
 ## Installation
 
 ```bash
-git clone https://github.com/your-org/vault-csv-normalizer
-cd vault-csv-normalizer
 make build
 # Binary is at ./bin/vault-csv-normalizer
 ```

From d05cd187a330d07a810790023a966bbee354350f Mon Sep 17 00:00:00 2001
From: Schuyler Bishop <schuyler@ibm.com>
Date: Wed, 27 May 2026 12:24:13 -0500
Subject: [PATCH 7/8] Add Terraform stub generation for unaliased entity
 clients

---
 README.md                              |  57 +++++++-
 cmd/vault-csv-normalizer/main.go       |  27 +++-
 internal/normalizer/normalizer.go      |  14 +-
 internal/normalizer/normalizer_test.go |  40 +-----
 internal/tfgen/tfgen.go                | 182 +++++++++++++++++++++++++
 5 files changed, 259 insertions(+), 61 deletions(-)
 create mode 100644 internal/tfgen/tfgen.go

diff --git a/README.md b/README.md
index 63dc853..cb8d1fe 100644
--- a/README.md
+++ b/README.md
@@ -94,6 +94,13 @@ OPTIONS:
         both blank. This includes records with no auth mount (mount_path
         empty) and merged/deleted entities (mount_path present). Applied after
         all deduplication steps.
+  -generate-tf
+        Generate Terraform HCL stubs for entity clients with no alias in the
+        export. Requires --dedup-methods-per-file. A client is targeted when
+        entity_alias_name is blank and mount_accessor is non-empty. For each
+        such client, vault_identity_entity and vault_identity_entity_alias
+        resources are written to vault-aliases.tf. Mount accessors are emitted
+        as Terraform variables. Does not affect counts or summary output.
   -per-file
         Print a summary for each input file before the combined summary
   -debug
@@ -150,6 +157,9 @@ vault-csv-normalizer -f export.csv --debug
 # Remove abandoned clients from final totals
 vault-csv-normalizer -f export.csv --remove-abandoned-clients
 
+# Generate Terraform stubs for unaliased LDAP/OIDC clients
+vault-csv-normalizer -f export.csv --dedup-methods-per-file ldap,oidc --generate-tf
+
 # Same as above, with debug count output for removed rows
 vault-csv-normalizer -f export.csv --remove-abandoned-clients --debug
 
@@ -215,23 +225,56 @@ Vault can record the same human as multiple clients when they authenticate via
 different auth methods (e.g. LDAP in one session and OIDC in another).
 `--dedup-methods-per-file` collapses these into a single count within each file.
 
+### How deduplication works
+
+Each auth method stores a different value as the entity alias in Vault:
+
+| Auth method | What Vault stores as `entity_alias_name` |
+|---|---|
+| `ldap` | Bare username: `alice` |
+| `oidc` | Bare username (from `entity_alias_metadata.username`): `alice` |
+| `jwt` | Full email address: `alice@corp.com` |
+
+The tool normalizes all three to a common base by stripping the domain suffix
+(`alice@corp.com` → `alice`), then matches records within the same file that
+share the same normalized alias and belong to the same method group.
+
+**This only works when the same string is used as the identity across all auth
+methods.** If `alice` logs in via LDAP as `alice` and via JWT as
+`alice@corp.com`, the normalization produces `alice` for both — they collapse.
+If the LDAP username and the JWT email prefix do not match (e.g. `asmith` vs
+`alice.smith@corp.com`), the records will not be collapsed.
+
+### Required conditions for cross-method dedup
+
+All of the following must be true for two records to be deduplicated:
+
+1. Both records are in the **same source file** — records across files are never collapsed.
+2. Both records' auth methods appear in the **same comma-separated list** passed to `--dedup-methods-per-file`. With `--dedup-methods-per-file ldap,oidc,jwt`, an LDAP and a JWT record can collapse. With `--dedup-methods-per-file ldap,oidc --dedup-methods-per-file jwt,saml`, an LDAP and a JWT record will never collapse — they are in separate groups.
+3. Both records have a **non-empty `entity_alias_name`** (or `entity_alias_metadata.username` for OIDC).
+4. The **normalized alias matches** — after stripping the domain suffix, the alias strings are identical.
+5. Neither record is a **PKI client** (`client_type=acme` or `mount_accessor` prefix `auth_cert`).
+
+If any condition is not met, both records pass through unchanged.
+
 ### Alias normalization
 
 `--dedup-methods-per-file` applies one normalization step before comparing:
 
-1. **Strip domain suffix** — everything from `@` onward is removed.
-   `alice@corp.com` → `alice`
+**Strip domain suffix** — everything from `@` onward is removed.
+`alice@corp.com` → `alice`
 
-Tier suffixes (`-t0`, `-t1`, `-t2`) are **not** stripped — `alice-t0` and
-`alice-t1` are treated as distinct identities within a file.
+This lets JWT records (which use full email addresses) match LDAP/OIDC records
+(which use bare usernames), provided the local part of the email is the same
+as the LDAP/OIDC username.
 
 ### Auth methods reference
 
 | `mount_type` / `auth_method` | Typical users | Notes |
 |---|---|---|
-| `ldap` | Humans | Aliases usually bare usernames (`alice`) or tiered (`alice-t0`) |
-| `oidc` | Humans | Aliases usually `username@domain.com` — strip domain to match LDAP |
-| `jwt` | Humans or services | May share aliases with LDAP/OIDC; include in group to collapse |
+| `ldap` | Humans | Aliases are bare usernames (`alice`) |
+| `oidc` | Humans | Aliases are bare usernames from `entity_alias_metadata.username` (`alice`) |
+| `jwt` | Humans or services | Aliases are full email addresses (`alice@corp.com`); domain is stripped to match LDAP/OIDC |
 | `approle` | Service accounts | Not human; not typically alias-deduped |
 | `kubernetes` | Service accounts | Not human; not typically alias-deduped |
 | `aws` / `gcp` | Service accounts | Not human; not typically alias-deduped |
diff --git a/cmd/vault-csv-normalizer/main.go b/cmd/vault-csv-normalizer/main.go
index cd56b06..30132c5 100644
--- a/cmd/vault-csv-normalizer/main.go
+++ b/cmd/vault-csv-normalizer/main.go
@@ -12,6 +12,7 @@ import (
 	"github.com/vault-csv-normalizer/internal/normalizer"
 	"github.com/vault-csv-normalizer/internal/parser"
 	"github.com/vault-csv-normalizer/internal/renderer"
+	"github.com/vault-csv-normalizer/internal/tfgen"
 )
 
 // multiFlag allows a flag to be specified multiple times.
@@ -47,6 +48,7 @@ func main() {
 	var filterSinceFile = make(fileDateFlag)
 	var countPKI bool
 	var removeAbandonedClients bool
+	var generateTF bool
 	var listMethods bool
 	var debugMode bool
 	var perFile bool
@@ -61,6 +63,7 @@ func main() {
 	flag.BoolVar(&countPKI, "p", false, "Partition and report PKI/cert clients (client_type=acme or mount_accessor prefix auth_cert) separately")
 	flag.Var(&dedupMethodsPerFile, "dedup-methods-per-file", "Deduplicate by alias for the specified comma-separated auth methods, scoped to each input file independently. Records in different files are never collapsed against each other. Repeatable to define multiple groups.")
 	flag.BoolVar(&removeAbandonedClients, "remove-abandoned-clients", false, "Remove abandoned clients (blank entity_name and entity_alias_name) after deduplication. Includes records with no auth mount and merged/deleted entities.")
+	flag.BoolVar(&generateTF, "generate-tf", false, "Generate Terraform HCL stubs for entity clients with no alias. Requires --dedup-methods-per-file. Output written to vault-aliases.tf.")
 	flag.BoolVar(&listMethods, "list-methods", false, "Print every distinct auth method found in the input files (with record counts and alias coverage), then exit. Useful for deciding --dedup-methods-per-file groups.")
 	flag.BoolVar(&debugMode, "debug", false, "Print all records grouped by mount path")
 	flag.BoolVar(&perFile, "per-file", false, "Print a summary for each input file before the combined summary")
@@ -134,7 +137,7 @@ func main() {
 			for _, group := range groups {
 				r0 := group[0]
 				fmt.Fprintf(os.Stdout, "\nAlias group: %q  file: %s\n",
-					normalizer.StripTierSuffix(normalizer.BaseAlias(r0.EntityAliasName)), filepath.Base(r0.Source))
+					normalizer.BaseAlias(r0.EntityAliasName), filepath.Base(r0.Source))
 				renderer.PrintTable(os.Stdout, group)
 			}
 			fmt.Fprintln(os.Stdout)
@@ -154,6 +157,9 @@ func main() {
 		fmt.Fprintln(os.Stdout, strings.Repeat("-", 70))
 	}
 
+	// Snapshot post-dedup records before filters for --generate-tf.
+	preFilterRecords := normalized
+
 	// Apply filters.
 	if filterNS != "" {
 		normalized = normalizer.FilterByNamespace(normalized, filterNS)
@@ -176,6 +182,23 @@ func main() {
 		os.Exit(1)
 	}
 
+	if generateTF {
+		if len(methodGroupsPerFile) == 0 {
+			fmt.Fprintln(os.Stderr, "warning: --generate-tf has no effect without --dedup-methods-per-file")
+		} else {
+			n, err := tfgen.GenerateTF(preFilterRecords, methodGroupsPerFile, "vault-aliases.tf")
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "error: --generate-tf: %v\n", err)
+				os.Exit(1)
+			}
+			if n == 0 {
+				fmt.Fprintln(os.Stdout, "generate-tf: no unaliased clients found in the specified method groups")
+			} else {
+				fmt.Fprintf(os.Stdout, "generate-tf: wrote %d entity stub(s) to vault-aliases.tf\n", n)
+			}
+		}
+	}
+
 	if debugMode {
 		// Group final (post-dedup) records by mount path.
 		var mountOrder []string
@@ -204,7 +227,7 @@ func main() {
 					if r.EntityAliasName == "" {
 						continue
 					}
-					norm := normalizer.StripTierSuffix(normalizer.BaseAlias(r.EntityAliasName))
+					norm := normalizer.BaseAlias(r.EntityAliasName)
 					aliasToIDs[norm] = append(aliasToIDs[norm], r.ClientID)
 				}
 				for alias, ids := range aliasToIDs {
diff --git a/internal/normalizer/normalizer.go b/internal/normalizer/normalizer.go
index b6cb8d0..3b4b491 100644
--- a/internal/normalizer/normalizer.go
+++ b/internal/normalizer/normalizer.go
@@ -162,8 +162,7 @@ func ParseTime(raw string) time.Time {
 
 // BaseAlias returns the portion of an entity alias name before the first '@'
 // character. If no '@' is present the full name is returned.
-// Example: "alice@corp.com" → "alice", "sbishop@hashicorp.com" → "sbishop",
-// "sbishop-t0" → "sbishop-t0".
+// Example: "alice@corp.com" → "alice", "sbishop@hashicorp.com" → "sbishop".
 func BaseAlias(name string) string {
 	for i, ch := range name {
 		if ch == '@' {
@@ -173,17 +172,6 @@ func BaseAlias(name string) string {
 	return name
 }
 
-// StripTierSuffix removes a trailing "-t0", "-t1", or "-t2" suffix from name.
-// Other suffixes are left unchanged.
-// Example: "alice-t0" → "alice", "bob-t2" → "bob", "carol-t3" → "carol-t3".
-func StripTierSuffix(name string) string {
-	n := len(name)
-	if n >= 3 && name[n-3] == '-' && name[n-2] == 't' && name[n-1] >= '0' && name[n-1] <= '2' {
-		return name[:n-3]
-	}
-	return name
-}
-
 // aliasKey is the deduplication key for alias-based dedup: one record is
 // allowed per (normalized alias, mount type) pair.
 type aliasKey struct {
diff --git a/internal/normalizer/normalizer_test.go b/internal/normalizer/normalizer_test.go
index 6ebefa5..55c7069 100644
--- a/internal/normalizer/normalizer_test.go
+++ b/internal/normalizer/normalizer_test.go
@@ -241,7 +241,7 @@ func TestBaseAlias(t *testing.T) {
 		{"alice@corp.com", "alice"},
 		{"sbishop@hashicorp.com", "sbishop"},
 		{"abc@234", "abc"},
-		{"sbishop-t0", "sbishop-t0"}, // BaseAlias alone does not strip tier
+		{"sbishop-t0", "sbishop-t0"},
 		{"plain", "plain"},
 		{"", ""},
 		{"@leading", ""},
@@ -254,44 +254,6 @@ func TestBaseAlias(t *testing.T) {
 	}
 }
 
-func TestStripTierSuffix(t *testing.T) {
-	cases := []struct{ in, want string }{
-		{"alice-t0", "alice"},
-		{"alice-t1", "alice"},
-		{"alice-t2", "alice"},
-		{"alice-t3", "alice-t3"}, // only t0–t2 are stripped
-		{"alice-t10", "alice-t10"},
-		{"alice-T0", "alice-T0"}, // case-sensitive
-		{"alice", "alice"},
-		{"-t0", ""},  // degenerate: only the suffix
-		{"t0", "t0"}, // no hyphen
-		{"", ""},
-	}
-	for _, c := range cases {
-		got := StripTierSuffix(c.in)
-		if got != c.want {
-			t.Errorf("StripTierSuffix(%q) = %q, want %q", c.in, got, c.want)
-		}
-	}
-}
-
-func TestStripTierSuffix_AfterBaseAlias(t *testing.T) {
-	// Strip domain then tier suffix.
-	cases := []struct{ in, want string }{
-		{"alice-t0@corp.com", "alice"},
-		{"alice-t1@corp.com", "alice"},
-		{"alice@corp.com", "alice"},
-		{"alice-t0", "alice"},
-		{"alice", "alice"},
-	}
-	for _, c := range cases {
-		got := StripTierSuffix(BaseAlias(c.in))
-		if got != c.want {
-			t.Errorf("StripTierSuffix(BaseAlias(%q)) = %q, want %q", c.in, got, c.want)
-		}
-	}
-}
-
 // helpers for alias dedup tests
 func clientIDs(records []Record) []string {
 	ids := make([]string, len(records))
diff --git a/internal/tfgen/tfgen.go b/internal/tfgen/tfgen.go
new file mode 100644
index 0000000..ea8960d
--- /dev/null
+++ b/internal/tfgen/tfgen.go
@@ -0,0 +1,182 @@
+// Package tfgen generates Terraform HCL stubs for Vault entity clients that
+// have no entity alias in the activity export. These stubs create a
+// vault_identity_entity and a vault_identity_entity_alias for each such record.
+package tfgen
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/vault-csv-normalizer/internal/normalizer"
+)
+
+var petAdjectives = []string{
+	"amber", "bold", "calm", "dark", "eager", "fair", "glad", "hardy",
+	"ivory", "jolly", "keen", "lofty", "merry", "noble", "proud",
+	"quiet", "rapid", "silver", "tawny", "vivid", "warm", "young",
+}
+
+var petNouns = []string{
+	"bear", "crane", "deer", "eagle", "fox", "goose", "hawk", "ibis",
+	"jay", "kite", "lark", "mole", "newt", "otter", "panda", "quail",
+	"raven", "swift", "teal", "vole", "wren", "yak", "zebra",
+}
+
+// nextPetName picks the next unused (adj_noun) name, cycling through all
+// combinations. Falls back to "entity_N" if all 500+ combinations are used.
+func nextPetName(used map[string]struct{}, counter *int) string {
+	total := len(petAdjectives) * len(petNouns)
+	for i := 0; i < total; i++ {
+		idx := (*counter + i) % total
+		name := petAdjectives[idx/len(petNouns)] + "_" + petNouns[idx%len(petNouns)]
+		if _, ok := used[name]; !ok {
+			*counter = (idx + 1) % total
+			used[name] = struct{}{}
+			return name
+		}
+	}
+	*counter++
+	name := fmt.Sprintf("entity_%d", *counter)
+	used[name] = struct{}{}
+	return name
+}
+
+// sanitizeID converts a string to a valid Terraform identifier by lowercasing
+// and replacing non-alphanumeric characters with underscores.
+func sanitizeID(s string) string {
+	var b strings.Builder
+	for _, ch := range strings.ToLower(s) {
+		if (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') {
+			b.WriteRune(ch)
+		} else {
+			b.WriteByte('_')
+		}
+	}
+	result := b.String()
+	for strings.Contains(result, "__") {
+		result = strings.ReplaceAll(result, "__", "_")
+	}
+	return strings.Trim(result, "_")
+}
+
+func buildMethodGroupMap(groups [][]string) map[string]string {
+	m := make(map[string]string)
+	for _, g := range groups {
+		if len(g) == 0 {
+			continue
+		}
+		canonical := g[0]
+		for _, method := range g {
+			m[method] = canonical
+		}
+	}
+	return m
+}
+
+// GenerateTF scans records for method-group members that have a non-empty
+// mount_accessor but a blank entity_alias_name. These clients exist in Vault
+// without an entity alias and need one created. For each such record a
+// vault_identity_entity and vault_identity_entity_alias stub is written to
+// outputPath. Mount accessors are emitted as Terraform variables so the caller
+// can override them per environment. Returns the number of stubs written.
+func GenerateTF(records []normalizer.Record, groups [][]string, outputPath string) (int, error) {
+	groupMap := buildMethodGroupMap(groups)
+
+	var targets []normalizer.Record
+	for _, r := range records {
+		if r.EntityAliasName != "" {
+			continue // already aliased — skip
+		}
+		if r.MountAccessor == "" {
+			continue // no mount accessor — cannot create alias
+		}
+		if normalizer.IsPKIClient(r) {
+			continue // PKI clients are excluded from alias management
+		}
+		mt := r.MountType
+		if mt == "" {
+			mt = r.AuthMethod
+		}
+		if _, ok := groupMap[mt]; !ok {
+			continue // not in any configured method group
+		}
+		targets = append(targets, r)
+	}
+
+	if len(targets) == 0 {
+		return 0, nil
+	}
+
+	type mountInfo struct {
+		accessor  string
+		mountPath string
+		mountType string
+		varName   string
+	}
+
+	accessorSeen := make(map[string]*mountInfo)
+	var accessorOrder []string
+	for _, r := range targets {
+		if _, ok := accessorSeen[r.MountAccessor]; !ok {
+			mi := &mountInfo{
+				accessor:  r.MountAccessor,
+				mountPath: r.MountPath,
+				mountType: r.MountType,
+				varName:   "accessor_" + sanitizeID(r.MountAccessor),
+			}
+			accessorSeen[r.MountAccessor] = mi
+			accessorOrder = append(accessorOrder, r.MountAccessor)
+		}
+	}
+
+	var sb strings.Builder
+
+	sb.WriteString("# Generated by vault-csv-normalizer\n")
+	sb.WriteString("# These stubs represent entity clients with no alias in the Vault export.\n")
+	sb.WriteString("# Fill in the TODO values and verify entity names before applying.\n\n")
+
+	for _, acc := range accessorOrder {
+		mi := accessorSeen[acc]
+		desc := mi.mountPath
+		if mi.mountType != "" {
+			desc += " (" + mi.mountType + ")"
+		}
+		sb.WriteString(fmt.Sprintf("variable %q {\n", mi.varName))
+		sb.WriteString(fmt.Sprintf("  description = %q\n", "Mount accessor for "+strings.TrimSpace(desc)))
+		sb.WriteString("  type        = string\n")
+		sb.WriteString(fmt.Sprintf("  default     = %q\n", mi.accessor))
+		sb.WriteString("}\n\n")
+	}
+
+	used := make(map[string]struct{})
+	counter := 0
+
+	for _, r := range targets {
+		name := nextPetName(used, &counter)
+		mi := accessorSeen[r.MountAccessor]
+
+		sb.WriteString(fmt.Sprintf("# client_id: %s | source: %s | mount: %s\n",
+			r.ClientID, filepath.Base(r.Source), r.MountPath))
+
+		sb.WriteString(fmt.Sprintf("resource \"vault_identity_entity\" %q {\n", name))
+		entityName := r.EntityName
+		if entityName == "" {
+			entityName = "TODO"
+		}
+		sb.WriteString(fmt.Sprintf("  name = %q\n", entityName))
+		sb.WriteString("}\n\n")
+
+		sb.WriteString(fmt.Sprintf("resource \"vault_identity_entity_alias\" %q {\n", name))
+		sb.WriteString("  name           = \"TODO\" # alias identifier used by the auth method\n")
+		sb.WriteString(fmt.Sprintf("  mount_accessor = var.%s\n", mi.varName))
+		sb.WriteString(fmt.Sprintf("  canonical_id   = vault_identity_entity.%s.id\n", name))
+		sb.WriteString("}\n\n")
+	}
+
+	if err := os.WriteFile(outputPath, []byte(sb.String()), 0644); err != nil {
+		return 0, fmt.Errorf("writing %s: %w", outputPath, err)
+	}
+	return len(targets), nil
+}

From 09f076fc611299a29e86cbd9afa64fe1369aa13d Mon Sep 17 00:00:00 2001
From: Schuyler Bishop <schuyler@ibm.com>
Date: Fri, 29 May 2026 10:00:05 -0500
Subject: [PATCH 8/8] Refactor README and main logic for Vault CSV
 normalization; add tests for Terraform generation

---
 README.md                              |   7 +-
 cmd/vault-csv-normalizer/main.go       |  16 +-
 internal/normalizer/normalizer.go      |   7 -
 internal/normalizer/normalizer_test.go |  44 -----
 internal/tfgen/tfgen.go                | 163 +++++++++-------
 internal/tfgen/tfgen_test.go           | 252 +++++++++++++++++++++++++
 6 files changed, 356 insertions(+), 133 deletions(-)
 create mode 100644 internal/tfgen/tfgen_test.go

diff --git a/README.md b/README.md
index cb8d1fe..c0547ef 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,12 @@
-# vault-csv-normalizer
+# vault-csv-count
 
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
 
+> **Disclaimer:** This is an unofficial, community-provided tool. It is not
+> created, endorsed, or supported by HashiCorp or IBM. Use at your own risk.
+> No warranty is provided. For official Vault client counting guidance, refer
+> to the [HashiCorp Vault documentation](https://developer.hashicorp.com/vault/docs).
+
 A CLI tool that reads one or more **HashiCorp Vault client export CSV files**,
 normalizes their data (consistent column names, types, and values across Vault
 versions), and displays a summary of client counts by mount path and type.
diff --git a/cmd/vault-csv-normalizer/main.go b/cmd/vault-csv-normalizer/main.go
index 30132c5..3ae4a37 100644
--- a/cmd/vault-csv-normalizer/main.go
+++ b/cmd/vault-csv-normalizer/main.go
@@ -129,12 +129,13 @@ func main() {
 	}
 
 	preDedup := normalized
+	var aliasGroups [][]normalizer.Record
 	if len(methodGroupsPerFile) > 0 {
-		groups := normalizer.FindAliasDuplicatesForMethodsPerFile(preDedup, methodGroupsPerFile)
-		if len(groups) > 0 {
-			fmt.Fprintf(os.Stdout, "Per-file method-scoped alias duplicates found (%d group(s))\n", len(groups))
+		aliasGroups = normalizer.FindAliasDuplicatesForMethodsPerFile(preDedup, methodGroupsPerFile)
+		if len(aliasGroups) > 0 {
+			fmt.Fprintf(os.Stdout, "Per-file method-scoped alias duplicates found (%d group(s))\n", len(aliasGroups))
 			fmt.Fprintln(os.Stdout, "=====================================================")
-			for _, group := range groups {
+			for _, group := range aliasGroups {
 				r0 := group[0]
 				fmt.Fprintf(os.Stdout, "\nAlias group: %q  file: %s\n",
 					normalizer.BaseAlias(r0.EntityAliasName), filepath.Base(r0.Source))
@@ -157,9 +158,6 @@ func main() {
 		fmt.Fprintln(os.Stdout, strings.Repeat("-", 70))
 	}
 
-	// Snapshot post-dedup records before filters for --generate-tf.
-	preFilterRecords := normalized
-
 	// Apply filters.
 	if filterNS != "" {
 		normalized = normalizer.FilterByNamespace(normalized, filterNS)
@@ -186,13 +184,13 @@ func main() {
 		if len(methodGroupsPerFile) == 0 {
 			fmt.Fprintln(os.Stderr, "warning: --generate-tf has no effect without --dedup-methods-per-file")
 		} else {
-			n, err := tfgen.GenerateTF(preFilterRecords, methodGroupsPerFile, "vault-aliases.tf")
+			n, err := tfgen.GenerateTF(aliasGroups, "vault-aliases.tf")
 			if err != nil {
 				fmt.Fprintf(os.Stderr, "error: --generate-tf: %v\n", err)
 				os.Exit(1)
 			}
 			if n == 0 {
-				fmt.Fprintln(os.Stdout, "generate-tf: no unaliased clients found in the specified method groups")
+				fmt.Fprintln(os.Stdout, "generate-tf: no alias groups found — nothing to generate")
 			} else {
 				fmt.Fprintf(os.Stdout, "generate-tf: wrote %d entity stub(s) to vault-aliases.tf\n", n)
 			}
diff --git a/internal/normalizer/normalizer.go b/internal/normalizer/normalizer.go
index 3b4b491..65ecc7d 100644
--- a/internal/normalizer/normalizer.go
+++ b/internal/normalizer/normalizer.go
@@ -172,13 +172,6 @@ func BaseAlias(name string) string {
 	return name
 }
 
-// aliasKey is the deduplication key for alias-based dedup: one record is
-// allowed per (normalized alias, mount type) pair.
-type aliasKey struct {
-	base      string
-	mountType string
-}
-
 // buildMethodGroupMap converts a list of groups (each a slice of mount-type
 // strings) into a map from every member to the group's canonical value (the
 // first element of the group). Methods not present in any group are absent
diff --git a/internal/normalizer/normalizer_test.go b/internal/normalizer/normalizer_test.go
index 55c7069..c70ee53 100644
--- a/internal/normalizer/normalizer_test.go
+++ b/internal/normalizer/normalizer_test.go
@@ -1,7 +1,6 @@
 package normalizer
 
 import (
-	"strings"
 	"testing"
 	"time"
 
@@ -144,49 +143,6 @@ func TestFilterByClientType(t *testing.T) {
 	}
 }
 
-func TestFilterAbandonedClients(t *testing.T) {
-	records := []Record{
-		// removed as merged/deleted: mount path present
-		{ClientID: "drop-merged-1", EntityName: "", EntityAliasName: "", MountPath: "auth/ldap/", MountType: "ldap"},
-		// removed as merged/deleted: mount path present even if mount type is blank
-		{ClientID: "drop-merged-2", EntityName: "", EntityAliasName: "", MountPath: "auth/oidc/", MountType: ""},
-		// removed as no mount: mount path missing
-		{ClientID: "drop-nomount-1", EntityName: "", EntityAliasName: "", MountPath: "", MountType: "ldap"},
-		// removed as merged/deleted PKI (auth_cert accessor, mount present)
-		{ClientID: "drop-merged-pki-1", EntityName: "", EntityAliasName: "", MountPath: "auth/cert/", MountType: "cert", MountAccessor: "auth_cert_abc123"},
-		// removed as no-mount PKI (auth_cert accessor, mount missing)
-		{ClientID: "drop-nomount-pki-1", EntityName: "", EntityAliasName: "", MountPath: "", MountType: "cert", MountAccessor: "auth_cert_xyz789"},
-		// keep: entity name present
-		{ClientID: "keep-3", EntityName: "Alice", EntityAliasName: "", MountPath: "auth/ldap/", MountType: "ldap"},
-		// keep: entity alias present
-		{ClientID: "keep-4", EntityName: "", EntityAliasName: "alice", MountPath: "auth/ldap/", MountType: "ldap"},
-	}
-
-	out, counts := FilterAbandonedClients(records)
-	if counts.NoMount != 2 {
-		t.Fatalf("expected NoMount=2, got %d", counts.NoMount)
-	}
-	if counts.NoMountPKI != 1 {
-		t.Fatalf("expected NoMountPKI=1, got %d", counts.NoMountPKI)
-	}
-	if counts.MergedDeleted != 3 {
-		t.Fatalf("expected MergedDeleted=3, got %d", counts.MergedDeleted)
-	}
-	if counts.MergedDeletedPKI != 1 {
-		t.Fatalf("expected MergedDeletedPKI=1, got %d", counts.MergedDeletedPKI)
-	}
-	if counts.Total() != 5 {
-		t.Fatalf("expected Total=5, got %d", counts.Total())
-	}
-	if len(out) != 2 {
-		t.Fatalf("expected 2 records after filter, got %d", len(out))
-	}
-	for _, r := range out {
-		if strings.HasPrefix(r.ClientID, "drop-") {
-			t.Fatal("drop-* records should have been removed")
-		}
-	}
-}
 
 func TestFilterSince(t *testing.T) {
 	records := []Record{
diff --git a/internal/tfgen/tfgen.go b/internal/tfgen/tfgen.go
index ea8960d..a0abdb7 100644
--- a/internal/tfgen/tfgen.go
+++ b/internal/tfgen/tfgen.go
@@ -1,6 +1,7 @@
-// Package tfgen generates Terraform HCL stubs for Vault entity clients that
-// have no entity alias in the activity export. These stubs create a
-// vault_identity_entity and a vault_identity_entity_alias for each such record.
+// Package tfgen generates Terraform HCL to consolidate per-auth-method Vault
+// client records into a single entity with multiple aliases. Each duplicate
+// group found by --dedup-methods-per-file becomes one vault_identity_entity
+// and one vault_identity_entity_alias per auth method in that group.
 package tfgen
 
 import (
@@ -24,8 +25,7 @@ var petNouns = []string{
 	"raven", "swift", "teal", "vole", "wren", "yak", "zebra",
 }
 
-// nextPetName picks the next unused (adj_noun) name, cycling through all
-// combinations. Falls back to "entity_N" if all 500+ combinations are used.
+// nextPetName picks the next unused adj_noun name from the wordlists.
 func nextPetName(used map[string]struct{}, counter *int) string {
 	total := len(petAdjectives) * len(petNouns)
 	for i := 0; i < total; i++ {
@@ -43,8 +43,7 @@ func nextPetName(used map[string]struct{}, counter *int) string {
 	return name
 }
 
-// sanitizeID converts a string to a valid Terraform identifier by lowercasing
-// and replacing non-alphanumeric characters with underscores.
+// sanitizeID converts a string to a valid Terraform identifier.
 func sanitizeID(s string) string {
 	var b strings.Builder
 	for _, ch := range strings.ToLower(s) {
@@ -61,51 +60,25 @@ func sanitizeID(s string) string {
 	return strings.Trim(result, "_")
 }
 
-func buildMethodGroupMap(groups [][]string) map[string]string {
-	m := make(map[string]string)
-	for _, g := range groups {
-		if len(g) == 0 {
-			continue
-		}
-		canonical := g[0]
-		for _, method := range g {
-			m[method] = canonical
-		}
+// effectiveAlias returns the human-readable alias for a record. For OIDC,
+// entity_alias_metadata_username holds the username; entity_alias_name may be
+// a subject identifier that doesn't match other methods.
+func effectiveAlias(r normalizer.Record) string {
+	if (r.MountType == "oidc" || r.AuthMethod == "oidc") && r.EntityAliasMetadataUsername != "" {
+		return r.EntityAliasMetadataUsername
 	}
-	return m
+	return r.EntityAliasName
 }
 
-// GenerateTF scans records for method-group members that have a non-empty
-// mount_accessor but a blank entity_alias_name. These clients exist in Vault
-// without an entity alias and need one created. For each such record a
-// vault_identity_entity and vault_identity_entity_alias stub is written to
-// outputPath. Mount accessors are emitted as Terraform variables so the caller
-// can override them per environment. Returns the number of stubs written.
-func GenerateTF(records []normalizer.Record, groups [][]string, outputPath string) (int, error) {
-	groupMap := buildMethodGroupMap(groups)
-
-	var targets []normalizer.Record
-	for _, r := range records {
-		if r.EntityAliasName != "" {
-			continue // already aliased — skip
-		}
-		if r.MountAccessor == "" {
-			continue // no mount accessor — cannot create alias
-		}
-		if normalizer.IsPKIClient(r) {
-			continue // PKI clients are excluded from alias management
-		}
-		mt := r.MountType
-		if mt == "" {
-			mt = r.AuthMethod
-		}
-		if _, ok := groupMap[mt]; !ok {
-			continue // not in any configured method group
-		}
-		targets = append(targets, r)
-	}
-
-	if len(targets) == 0 {
+// GenerateTF writes Terraform HCL to outputPath for each alias duplicate group.
+// groups is the output of normalizer.FindAliasDuplicatesForMethodsPerFile — each
+// inner slice is a set of records that represent the same person authenticated via
+// different auth methods. For each group, one vault_identity_entity resource and
+// one vault_identity_entity_alias per record are generated, consolidating the
+// separate per-method client records into a single Vault entity.
+// Returns the number of entity stubs written.
+func GenerateTF(groups [][]normalizer.Record, outputPath string) (int, error) {
+	if len(groups) == 0 {
 		return 0, nil
 	}
 
@@ -116,10 +89,14 @@ func GenerateTF(records []normalizer.Record, groups [][]string, outputPath strin
 		varName   string
 	}
 
+	// Collect unique mount accessors across all groups.
 	accessorSeen := make(map[string]*mountInfo)
 	var accessorOrder []string
-	for _, r := range targets {
-		if _, ok := accessorSeen[r.MountAccessor]; !ok {
+	for _, group := range groups {
+		for _, r := range group {
+			if r.MountAccessor == "" || accessorSeen[r.MountAccessor] != nil {
+				continue
+			}
 			mi := &mountInfo{
 				accessor:  r.MountAccessor,
 				mountPath: r.MountPath,
@@ -134,8 +111,9 @@ func GenerateTF(records []normalizer.Record, groups [][]string, outputPath strin
 	var sb strings.Builder
 
 	sb.WriteString("# Generated by vault-csv-normalizer\n")
-	sb.WriteString("# These stubs represent entity clients with no alias in the Vault export.\n")
-	sb.WriteString("# Fill in the TODO values and verify entity names before applying.\n\n")
+	sb.WriteString("# Each entity block consolidates records that represent the same person\n")
+	sb.WriteString("# authenticated via different auth methods within a single billing period.\n")
+	sb.WriteString("# Verify names before applying.\n\n")
 
 	for _, acc := range accessorOrder {
 		mi := accessorSeen[acc]
@@ -150,33 +128,74 @@ func GenerateTF(records []normalizer.Record, groups [][]string, outputPath strin
 		sb.WriteString("}\n\n")
 	}
 
+	// Group duplicate groups by source file, preserving order of first appearance.
+	var fileOrder []string
+	byFile := make(map[string][][]normalizer.Record)
+	for _, group := range groups {
+		source := filepath.Base(group[0].Source)
+		if _, ok := byFile[source]; !ok {
+			fileOrder = append(fileOrder, source)
+		}
+		byFile[source] = append(byFile[source], group)
+	}
+
 	used := make(map[string]struct{})
 	counter := 0
 
-	for _, r := range targets {
-		name := nextPetName(used, &counter)
-		mi := accessorSeen[r.MountAccessor]
-
-		sb.WriteString(fmt.Sprintf("# client_id: %s | source: %s | mount: %s\n",
-			r.ClientID, filepath.Base(r.Source), r.MountPath))
+	for _, source := range fileOrder {
+		fileGroups := byFile[source]
+		divider := strings.Repeat("#", 60)
+		sb.WriteString(fmt.Sprintf("%s\n# Source: %s (%d alias group(s))\n%s\n\n",
+			divider, source, len(fileGroups), divider))
+
+		for _, group := range fileGroups {
+			r0 := group[0]
+			entityName := normalizer.BaseAlias(effectiveAlias(r0))
+
+			methods := make([]string, 0, len(group))
+			for _, r := range group {
+				mt := r.MountType
+				if mt == "" {
+					mt = r.AuthMethod
+				}
+				methods = append(methods, mt)
+			}
 
-		sb.WriteString(fmt.Sprintf("resource \"vault_identity_entity\" %q {\n", name))
-		entityName := r.EntityName
-		if entityName == "" {
-			entityName = "TODO"
+			petname := nextPetName(used, &counter)
+
+			sb.WriteString(fmt.Sprintf("# alias: %s | methods: %s\n",
+				entityName, strings.Join(methods, ", ")))
+
+			sb.WriteString(fmt.Sprintf("resource \"vault_identity_entity\" %q {\n", petname))
+			sb.WriteString(fmt.Sprintf("  name = %q\n", entityName))
+			sb.WriteString("}\n\n")
+
+			for i, r := range group {
+				mt := r.MountType
+				if mt == "" {
+					mt = r.AuthMethod
+				}
+				aliasName := r.EntityAliasName
+				if aliasName == "" {
+					aliasName = "TODO"
+				}
+				aliasResource := fmt.Sprintf("%s_%d", petname, i)
+
+				sb.WriteString(fmt.Sprintf("resource \"vault_identity_entity_alias\" %q {\n", aliasResource))
+				sb.WriteString(fmt.Sprintf("  name           = %q # %s\n", aliasName, mt))
+				if mi := accessorSeen[r.MountAccessor]; mi != nil {
+					sb.WriteString(fmt.Sprintf("  mount_accessor = var.%s\n", mi.varName))
+				} else {
+					sb.WriteString("  mount_accessor = \"TODO\" # mount_accessor not in export\n")
+				}
+				sb.WriteString(fmt.Sprintf("  canonical_id   = vault_identity_entity.%s.id\n", petname))
+				sb.WriteString("}\n\n")
+			}
 		}
-		sb.WriteString(fmt.Sprintf("  name = %q\n", entityName))
-		sb.WriteString("}\n\n")
-
-		sb.WriteString(fmt.Sprintf("resource \"vault_identity_entity_alias\" %q {\n", name))
-		sb.WriteString("  name           = \"TODO\" # alias identifier used by the auth method\n")
-		sb.WriteString(fmt.Sprintf("  mount_accessor = var.%s\n", mi.varName))
-		sb.WriteString(fmt.Sprintf("  canonical_id   = vault_identity_entity.%s.id\n", name))
-		sb.WriteString("}\n\n")
 	}
 
 	if err := os.WriteFile(outputPath, []byte(sb.String()), 0644); err != nil {
 		return 0, fmt.Errorf("writing %s: %w", outputPath, err)
 	}
-	return len(targets), nil
+	return len(groups), nil
 }
diff --git a/internal/tfgen/tfgen_test.go b/internal/tfgen/tfgen_test.go
new file mode 100644
index 0000000..84edb44
--- /dev/null
+++ b/internal/tfgen/tfgen_test.go
@@ -0,0 +1,252 @@
+package tfgen
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/vault-csv-normalizer/internal/normalizer"
+)
+
+func TestGenerateTF_EmptyGroups(t *testing.T) {
+	out := filepath.Join(t.TempDir(), "out.tf")
+	n, err := GenerateTF(nil, out)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if n != 0 {
+		t.Errorf("expected 0 stubs, got %d", n)
+	}
+	if _, err := os.Stat(out); !os.IsNotExist(err) {
+		t.Error("expected no file to be written for empty groups")
+	}
+}
+
+func TestGenerateTF_SingleGroup(t *testing.T) {
+	groups := [][]normalizer.Record{
+		{
+			{
+				ClientID:      "ldap-001",
+				Source:        "jan.csv",
+				MountAccessor: "auth_ldap_abc",
+				MountPath:     "auth/ldap/",
+				MountType:     "ldap",
+				ClientType:    "entity",
+				EntityAliasName: "alice",
+			},
+			{
+				ClientID:      "oidc-001",
+				Source:        "jan.csv",
+				MountAccessor: "auth_oidc_xyz",
+				MountPath:     "auth/oidc/",
+				MountType:     "oidc",
+				ClientType:    "entity",
+				EntityAliasName: "alice@corp.com",
+			},
+		},
+	}
+
+	out := filepath.Join(t.TempDir(), "out.tf")
+	n, err := GenerateTF(groups, out)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if n != 1 {
+		t.Errorf("expected 1 stub, got %d", n)
+	}
+
+	content, err := os.ReadFile(out)
+	if err != nil {
+		t.Fatalf("reading output: %v", err)
+	}
+	tf := string(content)
+
+	// One entity resource
+	if count := strings.Count(tf, "resource \"vault_identity_entity\""); count != 1 {
+		t.Errorf("expected 1 vault_identity_entity resource, got %d", count)
+	}
+	// Two alias resources (one per record in the group)
+	if count := strings.Count(tf, "resource \"vault_identity_entity_alias\""); count != 2 {
+		t.Errorf("expected 2 vault_identity_entity_alias resources, got %d", count)
+	}
+	// Entity name uses base alias (no domain)
+	if !strings.Contains(tf, `name = "alice"`) {
+		t.Error("expected entity name to be the base alias \"alice\"")
+	}
+	// LDAP alias name preserved as-is
+	if !strings.Contains(tf, `name           = "alice" # ldap`) {
+		t.Error("expected LDAP alias name \"alice\"")
+	}
+	// OIDC alias name preserved as-is (full email)
+	if !strings.Contains(tf, `name           = "alice@corp.com" # oidc`) {
+		t.Error("expected OIDC alias name \"alice@corp.com\"")
+	}
+	// Variables declared for both mount accessors
+	if !strings.Contains(tf, `variable "accessor_auth_ldap_abc"`) {
+		t.Error("expected variable for auth_ldap_abc")
+	}
+	if !strings.Contains(tf, `variable "accessor_auth_oidc_xyz"`) {
+		t.Error("expected variable for auth_oidc_xyz")
+	}
+	// canonical_id references the entity resource
+	if !strings.Contains(tf, "vault_identity_entity.") {
+		t.Error("expected canonical_id referencing vault_identity_entity")
+	}
+}
+
+func TestGenerateTF_MultipleGroups(t *testing.T) {
+	groups := [][]normalizer.Record{
+		{
+			{ClientID: "ldap-001", Source: "jan.csv", MountAccessor: "auth_ldap_abc", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: "alice"},
+			{ClientID: "oidc-001", Source: "jan.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: "alice@corp.com"},
+		},
+		{
+			{ClientID: "ldap-002", Source: "jan.csv", MountAccessor: "auth_ldap_abc", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: "bob"},
+			{ClientID: "oidc-002", Source: "jan.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: "bob@corp.com"},
+		},
+	}
+
+	out := filepath.Join(t.TempDir(), "out.tf")
+	n, err := GenerateTF(groups, out)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if n != 2 {
+		t.Errorf("expected 2 stubs, got %d", n)
+	}
+
+	content, _ := os.ReadFile(out)
+	tf := string(content)
+
+	if count := strings.Count(tf, "resource \"vault_identity_entity\""); count != 2 {
+		t.Errorf("expected 2 vault_identity_entity resources, got %d", count)
+	}
+	if count := strings.Count(tf, "resource \"vault_identity_entity_alias\""); count != 4 {
+		t.Errorf("expected 4 vault_identity_entity_alias resources, got %d", count)
+	}
+	// Shared mount accessors declared only once each
+	if count := strings.Count(tf, `variable "accessor_auth_ldap_abc"`); count != 1 {
+		t.Errorf("expected mount accessor variable declared once, got %d", count)
+	}
+	if count := strings.Count(tf, `variable "accessor_auth_oidc_xyz"`); count != 1 {
+		t.Errorf("expected mount accessor variable declared once, got %d", count)
+	}
+}
+
+func TestGenerateTF_PetnamesAreUnique(t *testing.T) {
+	// Build enough groups to exercise multiple petname assignments.
+	aliases := []string{"alice", "bob", "carol", "dave", "eve"}
+	groups := make([][]normalizer.Record, len(aliases))
+	for i, alias := range aliases {
+		groups[i] = []normalizer.Record{
+			{ClientID: "ldap-" + alias, Source: "jan.csv", MountAccessor: "auth_ldap_abc", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: alias},
+			{ClientID: "oidc-" + alias, Source: "jan.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: alias + "@corp.com"},
+		}
+	}
+
+	out := filepath.Join(t.TempDir(), "out.tf")
+	_, err := GenerateTF(groups, out)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	content, _ := os.ReadFile(out)
+	tf := string(content)
+
+	// Extract resource names and verify uniqueness.
+	seen := make(map[string]int)
+	for _, line := range strings.Split(tf, "\n") {
+		line = strings.TrimSpace(line)
+		if strings.HasPrefix(line, "resource \"vault_identity_entity\" ") {
+			name := strings.Trim(strings.Fields(line)[2], `"{ `)
+			seen[name]++
+		}
+	}
+	for name, count := range seen {
+		if count > 1 {
+			t.Errorf("petname %q used %d times — names must be unique", name, count)
+		}
+	}
+}
+
+func TestGenerateTF_GroupedByFile(t *testing.T) {
+	groups := [][]normalizer.Record{
+		// jan.csv — alice
+		{
+			{ClientID: "ldap-001", Source: "/data/jan.csv", MountAccessor: "auth_ldap_abc", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: "alice"},
+			{ClientID: "oidc-001", Source: "/data/jan.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: "alice@corp.com"},
+		},
+		// feb.csv — alice (same person, different file)
+		{
+			{ClientID: "ldap-002", Source: "/data/feb.csv", MountAccessor: "auth_ldap_abc", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: "alice"},
+			{ClientID: "oidc-002", Source: "/data/feb.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: "alice@corp.com"},
+		},
+		// feb.csv — bob (second group in the same file)
+		{
+			{ClientID: "ldap-003", Source: "/data/feb.csv", MountAccessor: "auth_ldap_abc", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: "bob"},
+			{ClientID: "oidc-003", Source: "/data/feb.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: "bob@corp.com"},
+		},
+	}
+
+	out := filepath.Join(t.TempDir(), "out.tf")
+	n, err := GenerateTF(groups, out)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if n != 3 {
+		t.Errorf("expected 3 stubs, got %d", n)
+	}
+
+	content, _ := os.ReadFile(out)
+	tf := string(content)
+
+	// File headers present for both source files.
+	if !strings.Contains(tf, "Source: jan.csv") {
+		t.Error("expected file header for jan.csv")
+	}
+	if !strings.Contains(tf, "Source: feb.csv") {
+		t.Error("expected file header for feb.csv")
+	}
+	// jan.csv header shows 1 group, feb.csv header shows 2 groups.
+	if !strings.Contains(tf, "jan.csv (1 alias group(s))") {
+		t.Error("expected jan.csv to show 1 alias group")
+	}
+	if !strings.Contains(tf, "feb.csv (2 alias group(s))") {
+		t.Error("expected feb.csv to show 2 alias groups")
+	}
+	// jan.csv header appears before feb.csv header.
+	if strings.Index(tf, "jan.csv") > strings.Index(tf, "feb.csv") {
+		t.Error("expected jan.csv section before feb.csv section")
+	}
+	// Total: 3 entities, 6 aliases.
+	if count := strings.Count(tf, "resource \"vault_identity_entity\""); count != 3 {
+		t.Errorf("expected 3 vault_identity_entity resources, got %d", count)
+	}
+	if count := strings.Count(tf, "resource \"vault_identity_entity_alias\""); count != 6 {
+		t.Errorf("expected 6 vault_identity_entity_alias resources, got %d", count)
+	}
+}
+
+func TestGenerateTF_MissingMountAccessor(t *testing.T) {
+	groups := [][]normalizer.Record{
+		{
+			{ClientID: "ldap-001", Source: "jan.csv", MountAccessor: "", MountPath: "auth/ldap/", MountType: "ldap", EntityAliasName: "alice"},
+			{ClientID: "oidc-001", Source: "jan.csv", MountAccessor: "auth_oidc_xyz", MountPath: "auth/oidc/", MountType: "oidc", EntityAliasName: "alice@corp.com"},
+		},
+	}
+
+	out := filepath.Join(t.TempDir(), "out.tf")
+	_, err := GenerateTF(groups, out)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	content, _ := os.ReadFile(out)
+	tf := string(content)
+
+	// Record with no mount_accessor gets a TODO placeholder, not a var reference.
+	if !strings.Contains(tf, `mount_accessor = "TODO"`) {
+		t.Error("expected TODO placeholder for missing mount_accessor")
+	}
+}