Skip to content

Commit ac20180

Browse files
committed
decrease buckets number, handle getMore case
1 parent d380041 commit ac20180

File tree

4 files changed

+81
-11
lines changed

4 files changed

+81
-11
lines changed

docs/METRICS.md

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ Duration of HTTP requests in seconds, labeled by:
5555
- `route` - Request route/path
5656
- `status_code` - HTTP status code
5757

58-
Buckets: 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds
58+
Buckets: 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds
5959

6060
#### http_requests_total (Counter)
6161

@@ -74,7 +74,7 @@ Labels:
7474
- `operation_name` - Name of the GraphQL operation
7575
- `operation_type` - Type of operation (query, mutation, subscription)
7676

77-
Buckets: 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds
77+
Buckets: 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds
7878

7979
**Purpose**: Identify slow API operations (P95/P99 latency).
8080

@@ -97,25 +97,34 @@ Labels:
9797
- `field_name` - Field name being resolved
9898
- `operation_name` - Name of the GraphQL operation
9999

100-
Buckets: 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5 seconds
100+
Buckets: 0.01, 0.05, 0.1, 0.5, 1, 5 seconds
101101

102102
**Purpose**: Find slow or CPU-intensive resolvers that degrade overall performance.
103103

104104
### MongoDB Metrics
105105

106106
#### hawk_mongo_command_duration_seconds (Histogram)
107107

108-
Histogram of MongoDB command duration by command, collection, and database.
108+
Histogram of MongoDB command duration by command, collection family, and database.
109109

110110
Labels:
111111
- `command` - MongoDB command name (find, insert, update, etc.)
112-
- `collection` - Collection name
112+
- `collection_family` - Collection family name (extracted from dynamic collection names to reduce cardinality)
113113
- `db` - Database name
114114

115-
Buckets: 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds
115+
Buckets: 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds
116116

117117
**Purpose**: Detect slow queries and high-latency collections.
118118

119+
**Note on Collection Families**: To reduce metric cardinality, dynamic collection names are grouped into families. For example:
120+
- `events:projectId``events`
121+
- `dailyEvents:projectId``dailyEvents`
122+
- `repetitions:projectId``repetitions`
123+
- `membership:userId``membership`
124+
- `team:workspaceId``team`
125+
126+
This prevents metric explosion when dealing with thousands of projects, users, or workspaces, while still providing meaningful insights into collection performance patterns.
127+
119128
#### hawk_mongo_command_errors_total (Counter)
120129

121130
Counter of failed MongoDB commands grouped by command and error code.
@@ -173,6 +182,7 @@ The metrics implementation uses the `prom-client` library and consists of:
173182
- Implements MongoDB command monitoring
174183
- Tracks command duration and errors
175184
- Uses MongoDB's command monitoring events
185+
- Extracts collection families from dynamic collection names to reduce cardinality
176186

177187
4. **Integration** (`src/index.ts`, `src/mongo.ts`):
178188
- Adds GraphQL metrics plugin to Apollo Server

src/metrics/graphql.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ export const gqlOperationDuration = new client.Histogram({
1010
name: 'hawk_gql_operation_duration_seconds',
1111
help: 'Histogram of total GraphQL operation duration by operation name and type',
1212
labelNames: ['operation_name', 'operation_type'],
13-
buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10],
13+
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5, 10],
1414
});
1515

1616
/**
@@ -31,7 +31,7 @@ export const gqlResolverDuration = new client.Histogram({
3131
name: 'hawk_gql_resolver_duration_seconds',
3232
help: 'Histogram of resolver execution time per type, field, and operation',
3333
labelNames: ['type_name', 'field_name', 'operation_name'],
34-
buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5],
34+
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5],
3535
});
3636

3737
/**

src/metrics/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ const httpRequestDuration = new client.Histogram({
2121
name: 'http_request_duration_seconds',
2222
help: 'Duration of HTTP requests in seconds',
2323
labelNames: ['method', 'route', 'status_code'],
24-
buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10],
24+
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5, 10],
2525
registers: [ register ],
2626
});
2727

src/metrics/mongodb.ts

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ export const mongoCommandDuration = new promClient.Histogram({
99
name: 'hawk_mongo_command_duration_seconds',
1010
help: 'Histogram of MongoDB command duration by command, collection family, and db',
1111
labelNames: ['command', 'collection_family', 'db'],
12-
buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10],
12+
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5, 10],
1313
});
1414

1515
/**
@@ -22,6 +22,64 @@ export const mongoCommandErrors = new promClient.Counter({
2222
labelNames: ['command', 'error_code'],
2323
});
2424

25+
/**
26+
* Extract collection name from MongoDB command
27+
* Handles different command types and their collection name locations
28+
* @param command - MongoDB command object
29+
* @param commandName - Name of the command (find, insert, getMore, etc.)
30+
* @returns Raw collection identifier or null
31+
*/
32+
function extractCollectionFromCommand(command: any, commandName: string): unknown {
33+
if (!command) {
34+
return null;
35+
}
36+
37+
// Special handling for getMore command - collection is in a different field
38+
if (commandName === 'getMore') {
39+
return command.collection || null;
40+
}
41+
42+
/*
43+
* For most commands, collection name is the value of the command name key
44+
* e.g., { find: "users" } -> collection is "users"
45+
*/
46+
return command[commandName] || null;
47+
}
48+
49+
/**
50+
* Normalize collection value to string
51+
* Handles BSON types and other non-string values
52+
* @param collection - Collection value from MongoDB command
53+
* @returns Normalized string or 'unknown'
54+
*/
55+
function normalizeCollectionName(collection: unknown): string {
56+
if (!collection) {
57+
return 'unknown';
58+
}
59+
60+
// Handle string values directly
61+
if (typeof collection === 'string') {
62+
return collection;
63+
}
64+
65+
// Handle BSON types and objects with toString method
66+
if (typeof collection === 'object' && 'toString' in collection) {
67+
try {
68+
const str = String(collection);
69+
70+
// Skip if toString returns object representation like [object Object]
71+
if (!str.startsWith('[object') && str !== 'unknown') {
72+
return str;
73+
}
74+
} catch (e) {
75+
console.error('Error normalizing collection name', e);
76+
// Ignore conversion errors
77+
}
78+
}
79+
80+
return 'unknown';
81+
}
82+
2583
/**
2684
* Extract collection family from full collection name
2785
* Reduces cardinality by grouping dynamic collections
@@ -65,8 +123,10 @@ export function setupMongoMetrics(client: MongoClient): void {
65123
const metadataKey = `${event.requestId}`;
66124

67125
// Extract collection name from the command
68-
const collection = event.command ? ((event.command)[event.commandName] || 'unknown') : 'unknown';
126+
const collectionRaw = extractCollectionFromCommand(event.command, event.commandName);
127+
const collection = normalizeCollectionName(collectionRaw);
69128
const collectionFamily = getCollectionFamily(collection);
129+
70130
const db = event.databaseName || 'unknown';
71131

72132
// eslint-disable-next-line @typescript-eslint/no-explicit-any

0 commit comments

Comments
 (0)