Skip to content

Commit e4e2008

Browse files
authored
feat(aws): add AWS OpenSearch Serverless (AOSS) support (#213)
* init aoss support Signed-off-by: ps48 <pshenoy36@gmail.com> * remove mv2 lock Signed-off-by: ps48 <pshenoy36@gmail.com> * add support for serverless and fix otel demo flags Signed-off-by: ps48 <pshenoy36@gmail.com> * delete mem files Signed-off-by: ps48 <pshenoy36@gmail.com> * update index patterns Signed-off-by: ps48 <pshenoy36@gmail.com> * resolve comments Signed-off-by: ps48 <pshenoy36@gmail.com> --------- Signed-off-by: ps48 <pshenoy36@gmail.com>
1 parent cdda68c commit e4e2008

21 files changed

Lines changed: 1066 additions & 103 deletions

.env

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,5 +225,9 @@ JAEGER_HOST=jaeger
225225
JAEGER_UI_PORT=16686
226226
JAEGER_GRPC_PORT=4317
227227

228+
# Telemetry Docs (referenced by frontend-proxy envoy template)
229+
TELEMETRY_DOCS_HOST=otel-collector
230+
TELEMETRY_DOCS_PORT=4318
231+
228232
# Java Options (workaround for OSX JDK bug)
229233
_JAVA_OPTIONS=

aws/cdk/bin/app.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,17 @@ const env = {
1111
region: process.env.CDK_DEFAULT_REGION,
1212
};
1313

14-
// Slow-changing infra: OpenSearch domain, AMP workspace, DQS data source
14+
const opensearchType = (app.node.tryGetContext('opensearchType') as 'managed' | 'serverless') || 'managed';
15+
16+
// Slow-changing infra: OpenSearch domain/collection, AMP workspace, DQS data source
1517
const infra = new InfraStack(app, 'ObsInfra', {
1618
env,
17-
osInstanceType: 'r6g.large.search',
18-
osInstanceCount: 1,
19-
osVolumeSize: 100,
19+
opensearchType,
20+
...(opensearchType !== 'serverless' && {
21+
osInstanceType: 'r6g.large.search',
22+
osInstanceCount: 1,
23+
osVolumeSize: 100,
24+
}),
2025
});
2126

2227
// Fast-iteration stack: FGAC, OSIS pipeline, OpenSearch App, UI init

aws/cdk/lib/demo-workload.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,17 @@ export class DemoWorkload extends Construct {
8686
'git clone --depth 1 https://github.com/opensearch-project/observability-stack.git /opt/obs-stack',
8787
'cd /opt/obs-stack',
8888
'',
89+
'# Patch otel-demo frontend-proxy: upstream envoy template references',
90+
'# ${TELEMETRY_DOCS_HOST}/${TELEMETRY_DOCS_PORT} but those aren\'t wired',
91+
'# through compose, so envoy bootstraps with an empty socket address and',
92+
'# crash-loops. Inject the vars and forward them into the service.',
93+
'if ! grep -q "^TELEMETRY_DOCS_HOST=" .env; then',
94+
' printf "\\nTELEMETRY_DOCS_HOST=otel-collector\\nTELEMETRY_DOCS_PORT=4318\\n" >> .env',
95+
'fi',
96+
'if ! grep -q "TELEMETRY_DOCS_HOST" docker-compose.otel-demo.yml; then',
97+
' sed -i "/^ - FLAGD_UI_PORT$/a\\ - TELEMETRY_DOCS_HOST\\n - TELEMETRY_DOCS_PORT" docker-compose.otel-demo.yml',
98+
'fi',
99+
'',
89100
'cat > docker-compose/otel-collector/config.yaml << \'COLLECTOREOF\'',
90101
'extensions:',
91102
' sigv4auth:',
@@ -157,6 +168,11 @@ export class DemoWorkload extends Construct {
157168
' logging: *logging',
158169
'MANAGEDEOF',
159170
'',
171+
'# Kafka\'s healthcheck can exceed compose\'s dependency grace window on',
172+
'# first boot, leaving kafka-dependent services in Created state. Retry',
173+
'# once — second pass finds kafka healthy and starts the stragglers.',
174+
'docker compose -f docker-compose.managed.yml up -d || true',
175+
'sleep 60',
160176
'docker compose -f docker-compose.managed.yml up -d',
161177
].join('\n'),
162178
{ OsiEndpoint: osiEndpoint },

aws/cdk/lib/infra-stack.ts

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
import * as cdk from 'aws-cdk-lib';
22
import { Construct } from 'constructs';
33
import { OpenSearchConstruct } from './opensearch';
4+
import { OpenSearchServerlessConstruct } from './opensearch-serverless';
45
import { PrometheusConstruct } from './prometheus';
56

67
export interface InfraStackProps extends cdk.StackProps {
8+
opensearchType?: 'managed' | 'serverless';
79
osInstanceType?: string;
810
osInstanceCount?: number;
911
osVolumeSize?: number;
1012
}
1113

1214
export class InfraStack extends cdk.Stack {
15+
public readonly opensearchType: string;
1316
public readonly domainEndpoint: string;
1417
public readonly domainArn: string;
1518
public readonly masterPasswordSecretArn: string;
@@ -24,21 +27,35 @@ export class InfraStack extends cdk.Stack {
2427

2528
cdk.Tags.of(this).add('observability-stack', this.stackName);
2629

27-
const opensearch = new OpenSearchConstruct(this, 'OpenSearch', {
28-
instanceType: props.osInstanceType ?? 'r6g.large.search',
29-
instanceCount: props.osInstanceCount ?? 1,
30-
volumeSize: props.osVolumeSize ?? 100,
31-
});
30+
this.opensearchType = props.opensearchType ?? 'managed';
31+
32+
if (this.opensearchType === 'serverless') {
33+
const collectionName = `obs-${this.stackName}`.toLowerCase().replace(/[^a-z0-9-]/g, '-').slice(0, 32);
34+
const serverless = new OpenSearchServerlessConstruct(this, 'OpenSearchServerless', {
35+
collectionName,
36+
});
37+
38+
this.domainEndpoint = serverless.collectionEndpoint;
39+
this.domainArn = serverless.collectionArn;
40+
this.masterPasswordSecretArn = '';
41+
this.pipelineRoleArn = serverless.pipelineRole.roleArn;
42+
} else {
43+
const opensearch = new OpenSearchConstruct(this, 'OpenSearch', {
44+
instanceType: props.osInstanceType ?? 'r6g.large.search',
45+
instanceCount: props.osInstanceCount ?? 1,
46+
volumeSize: props.osVolumeSize ?? 100,
47+
});
48+
49+
this.domainEndpoint = opensearch.domain.domainEndpoint;
50+
this.domainArn = opensearch.domain.domainArn;
51+
this.masterPasswordSecretArn = opensearch.masterPasswordSecret.secretArn;
52+
this.pipelineRoleArn = opensearch.pipelineRole.roleArn;
53+
}
3254

3355
const prometheus = new PrometheusConstruct(this, 'Prometheus', {
34-
domainArn: opensearch.domain.domainArn,
56+
domainArn: this.domainArn,
3557
});
3658

37-
// Store for cross-stack refs
38-
this.domainEndpoint = opensearch.domain.domainEndpoint;
39-
this.domainArn = opensearch.domain.domainArn;
40-
this.masterPasswordSecretArn = opensearch.masterPasswordSecret.secretArn;
41-
this.pipelineRoleArn = opensearch.pipelineRole.roleArn;
4259
this.ampWorkspaceArn = prometheus.workspace.attrArn;
4360
this.ampWorkspaceId = prometheus.workspace.attrWorkspaceId;
4461
this.dqsDataSourceArn = prometheus.dataSourceArn;
@@ -48,9 +65,12 @@ export class InfraStack extends cdk.Stack {
4865
const exp = (name: string, value: string) =>
4966
new cdk.CfnOutput(this, name, { value, exportName: `${this.stackName}-${name}` });
5067

68+
exp('OpenSearchType', this.opensearchType);
5169
exp('DomainEndpoint', this.domainEndpoint);
5270
exp('DomainArn', this.domainArn);
53-
exp('MasterPasswordSecretArn', this.masterPasswordSecretArn);
71+
if (this.masterPasswordSecretArn) {
72+
exp('MasterPasswordSecretArn', this.masterPasswordSecretArn);
73+
}
5474
exp('PipelineRoleArn', this.pipelineRoleArn);
5575
exp('AmpWorkspaceArn', this.ampWorkspaceArn);
5676
exp('AmpWorkspaceId', this.ampWorkspaceId);

aws/cdk/lib/ingestion-pipeline.ts

Lines changed: 148 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export interface IngestionPipelineProps {
1010
region: string;
1111
minOcu: number;
1212
maxOcu: number;
13+
serverless?: boolean;
1314
}
1415

1516
export class IngestionPipeline extends Construct {
@@ -22,14 +23,18 @@ export class IngestionPipeline extends Construct {
2223
const stack = cdk.Stack.of(this);
2324
this.pipelineName = `obs-stack-${stack.stackName}`.toLowerCase().replace(/[^a-z0-9-]/g, '-').slice(0, 28);
2425

25-
const opensearchEndpoint = `https://${props.domainEndpoint}`;
26+
const opensearchEndpoint = props.domainEndpoint.startsWith('https://')
27+
? props.domainEndpoint
28+
: `https://${props.domainEndpoint}`;
2629
const prometheusUrl = `https://aps-workspaces.${props.region}.amazonaws.com/workspaces/${props.prometheusWorkspaceId}/api/v1/remote_write`;
2730

31+
const renderFn = props.serverless ? renderServerlessPipelineYaml : renderManagedPipelineYaml;
32+
2833
this.pipeline = new osis.CfnPipeline(this, 'Pipeline', {
2934
pipelineName: this.pipelineName,
3035
minUnits: props.minOcu,
3136
maxUnits: props.maxOcu,
32-
pipelineConfigurationBody: renderPipelineYaml({
37+
pipelineConfigurationBody: renderFn({
3338
pipelineName: this.pipelineName,
3439
opensearchEndpoint,
3540
prometheusUrl,
@@ -41,13 +46,17 @@ export class IngestionPipeline extends Construct {
4146
}
4247
}
4348

44-
function renderPipelineYaml(cfg: {
49+
interface PipelineConfig {
4550
pipelineName: string;
4651
opensearchEndpoint: string;
4752
prometheusUrl: string;
4853
region: string;
4954
roleArn: string;
50-
}): string {
55+
}
56+
57+
// ── Managed (AOS) pipeline ──────────────────────────────────────────────────
58+
59+
function renderManagedPipelineYaml(cfg: PipelineConfig): string {
5160
return `\
5261
version: '2'
5362
otlp-pipeline:
@@ -164,3 +173,138 @@ otel-metrics-pipeline:
164173
region: '${cfg.region}'
165174
`;
166175
}
176+
177+
// ── Serverless (AOSS) pipeline ──────────────────────────────────────────────
178+
179+
const LOGS_TEMPLATE = JSON.stringify({"version":1,"template":{"mappings":{"date_detection":false,"_source":{"enabled":true},"dynamic_templates":[{"long_resource_attributes":{"mapping":{"type":"long"},"path_match":"resource.attributes.*","match_mapping_type":"long"}},{"double_resource_attributes":{"mapping":{"type":"double"},"path_match":"resource.attributes.*","match_mapping_type":"double"}},{"string_resource_attributes":{"mapping":{"type":"keyword","ignore_above":256},"path_match":"resource.attributes.*","match_mapping_type":"string"}},{"long_scope_attributes":{"mapping":{"type":"long"},"path_match":"instrumentationScope.attributes.*","match_mapping_type":"long"}},{"double_scope_attributes":{"mapping":{"type":"double"},"path_match":"instrumentationScope.attributes.*","match_mapping_type":"double"}},{"string_scope_attributes":{"mapping":{"type":"keyword","ignore_above":256},"path_match":"instrumentationScope.attributes.*","match_mapping_type":"string"}},{"long_attributes":{"mapping":{"type":"long"},"path_match":"attributes.*","match_mapping_type":"long"}},{"double_attributes":{"mapping":{"type":"double"},"path_match":"attributes.*","match_mapping_type":"double"}},{"string_attributes":{"mapping":{"type":"keyword","ignore_above":256},"path_match":"attributes.*","match_mapping_type":"string"}}],"properties":{"droppedAttributesCount":{"type":"integer"},"instrumentationScope":{"properties":{"droppedAttributesCount":{"type":"integer"},"schemaUrl":{"type":"keyword","ignore_above":256},"name":{"type":"keyword","ignore_above":128},"version":{"type":"keyword","ignore_above":64}}},"resource":{"properties":{"droppedAttributesCount":{"type":"integer"},"schemaUrl":{"type":"keyword","ignore_above":256}}},"severity":{"properties":{"number":{"type":"integer"},"text":{"type":"keyword","ignore_above":32}}},"body":{"type":"text"},"@timestamp":{"type":"date_nanos"},"time":{"type":"date_nanos"},"observedTime":{"type":"date_nanos"},"traceId":{"type":"keyword","ignore_above":32},"spanId":{"type":"keyword","ignore_above":16},"flags":{"type":"long"}}}}});
180+
181+
const TRACES_TEMPLATE = JSON.stringify({"version":1,"template":{"mappings":{"date_detection":false,"_source":{"enabled":true},"dynamic_templates":[{"long_resource_attributes":{"mapping":{"type":"long"},"path_match":"resource.attributes.*","match_mapping_type":"long"}},{"double_resource_attributes":{"mapping":{"type":"double"},"path_match":"resource.attributes.*","match_mapping_type":"double"}},{"string_resource_attributes":{"mapping":{"type":"keyword","ignore_above":256},"path_match":"resource.attributes.*","match_mapping_type":"string"}},{"long_scope_attributes":{"mapping":{"type":"long"},"path_match":"instrumentationScope.attributes.*","match_mapping_type":"long"}},{"double_scope_attributes":{"mapping":{"type":"double"},"path_match":"instrumentationScope.attributes.*","match_mapping_type":"double"}},{"string_scope_attributes":{"mapping":{"type":"keyword","ignore_above":256},"path_match":"instrumentationScope.attributes.*","match_mapping_type":"string"}},{"long_attributes":{"mapping":{"type":"long"},"path_match":"attributes.*","match_mapping_type":"long"}},{"double_attributes":{"mapping":{"type":"double"},"path_match":"attributes.*","match_mapping_type":"double"}},{"string_attributes":{"mapping":{"type":"keyword","ignore_above":256},"path_match":"attributes.*","match_mapping_type":"string"}}],"properties":{"droppedAttributesCount":{"type":"integer"},"instrumentationScope":{"properties":{"droppedAttributesCount":{"type":"integer"},"schemaUrl":{"type":"keyword","ignore_above":256},"name":{"type":"keyword","ignore_above":128},"version":{"type":"keyword","ignore_above":64}}},"resource":{"properties":{"droppedAttributesCount":{"type":"integer"},"schemaUrl":{"type":"keyword","ignore_above":256}}},"traceId":{"type":"keyword","ignore_above":32},"spanId":{"type":"keyword","ignore_above":16},"parentSpanId":{"type":"keyword","ignore_above":16},"name":{"ignore_above":1024,"type":"keyword"},"traceState":{"ignore_above":1024,"type":"keyword"},"traceGroup":{"ignore_above":1024,"type":"keyword"},"traceGroupFields":{"properties":{"endTime":{"type":"date_nanos"},"durationInNanos":{"type":"long"},"statusCode":{"type":"integer"}}},"kind":{"type":"keyword","ignore_above":32},"serviceName":{"type":"keyword","ignore_above":256},"startTime":{"type":"date_nanos"},"endTime":{"type":"date_nanos"},"@timestamp":{"type":"date_nanos"},"time":{"type":"date_nanos"},"status":{"properties":{"code":{"type":"integer"},"message":{"type":"keyword","ignore_above":2048}}},"durationInNanos":{"type":"long"},"events":{"type":"nested","properties":{"name":{"type":"keyword","ignore_above":256},"attributes":{"type":"object"},"droppedAttributesCount":{"type":"integer"},"time":{"type":"date_nanos"}}},"droppedEventsCount":{"type":"integer"},"links":{"type":"nested","properties":{"traceId":{"type":"keyword","ignore_above":32},"spanId":{"type":"keyword","ignore_above":16},"traceState":{"ignore_above":1024,"type":"keyword"},"attributes":{"type":"object"},"droppedAttributesCount":{"type":"integer"}}},"droppedLinksCount":{"type":"integer"}}}}});
182+
183+
const SERVICE_MAP_TEMPLATE = JSON.stringify({"version":0,"template":{"mappings":{"date_detection":false,"dynamic_templates":[{"long_group_by_attributes":{"path_match":"*.groupByAttributes.*","match_mapping_type":"long","mapping":{"type":"long"}}},{"double_group_by_attributes":{"path_match":"*.groupByAttributes.*","match_mapping_type":"double","mapping":{"type":"double"}}},{"string_group_by_attributes":{"path_match":"*.groupByAttributes.*","match_mapping_type":"string","mapping":{"type":"keyword"}}},{"long_operation_attributes":{"path_match":"*.attributes.*","match_mapping_type":"long","mapping":{"type":"long"}}},{"double_operation_attributes":{"path_match":"*.attributes.*","match_mapping_type":"double","mapping":{"type":"double"}}},{"string_operation_attributes":{"path_match":"*.attributes.*","match_mapping_type":"string","mapping":{"type":"keyword"}}}],"_source":{"enabled":true},"properties":{"sourceNode":{"properties":{"type":{"type":"keyword"},"keyAttributes":{"properties":{"environment":{"type":"keyword"},"name":{"type":"keyword"}}},"groupByAttributes":{"type":"object","dynamic":"true"}}},"targetNode":{"properties":{"type":{"type":"keyword"},"keyAttributes":{"properties":{"environment":{"type":"keyword"},"name":{"type":"keyword"}}},"groupByAttributes":{"type":"object","dynamic":"true"}}},"sourceOperation":{"properties":{"name":{"type":"keyword"},"attributes":{"type":"object","dynamic":"true"}}},"targetOperation":{"properties":{"name":{"type":"keyword"},"attributes":{"type":"object","dynamic":"true"}}},"nodeConnectionHash":{"type":"keyword"},"operationConnectionHash":{"type":"keyword"},"timestamp":{"type":"date","format":"strict_date_optional_time||epoch_millis"}}}}});
184+
185+
function renderServerlessPipelineYaml(cfg: PipelineConfig): string {
186+
return `\
187+
version: '2'
188+
otlp-pipeline:
189+
source:
190+
otlp:
191+
logs_path: '/${cfg.pipelineName}/v1/logs'
192+
traces_path: '/${cfg.pipelineName}/v1/traces'
193+
metrics_path: '/${cfg.pipelineName}/v1/metrics'
194+
route:
195+
- logs: 'getEventType() == "LOG"'
196+
- traces: 'getEventType() == "TRACE"'
197+
- metrics: 'getEventType() == "METRIC"'
198+
processor: []
199+
sink:
200+
- pipeline:
201+
name: otel-logs-pipeline
202+
routes:
203+
- logs
204+
- pipeline:
205+
name: otel-traces-pipeline
206+
routes:
207+
- traces
208+
- pipeline:
209+
name: otel-metrics-pipeline
210+
routes:
211+
- metrics
212+
213+
otel-logs-pipeline:
214+
source:
215+
pipeline:
216+
name: otlp-pipeline
217+
processor:
218+
- copy_values:
219+
entries:
220+
- from_key: "time"
221+
to_key: "@timestamp"
222+
sink:
223+
- opensearch:
224+
hosts:
225+
- '${cfg.opensearchEndpoint}'
226+
index: 'logs-otel-v1'
227+
template_type: 'index-template'
228+
template_content: '${LOGS_TEMPLATE}'
229+
aws:
230+
serverless: true
231+
region: '${cfg.region}'
232+
sts_role_arn: "${cfg.roleArn}"
233+
234+
otel-traces-pipeline:
235+
source:
236+
pipeline:
237+
name: otlp-pipeline
238+
processor: []
239+
sink:
240+
- pipeline:
241+
name: traces-raw-pipeline
242+
routes: []
243+
- pipeline:
244+
name: service-map-pipeline
245+
routes: []
246+
247+
traces-raw-pipeline:
248+
source:
249+
pipeline:
250+
name: otel-traces-pipeline
251+
processor:
252+
- otel_traces:
253+
sink:
254+
- opensearch:
255+
hosts:
256+
- '${cfg.opensearchEndpoint}'
257+
index: 'otel-v1-apm-span'
258+
template_type: 'index-template'
259+
template_content: '${TRACES_TEMPLATE}'
260+
aws:
261+
serverless: true
262+
region: '${cfg.region}'
263+
sts_role_arn: "${cfg.roleArn}"
264+
265+
service-map-pipeline:
266+
source:
267+
pipeline:
268+
name: otel-traces-pipeline
269+
processor:
270+
- otel_apm_service_map:
271+
db_path: /tmp/otel-apm-service-map
272+
group_by_attributes:
273+
- telemetry.sdk.language
274+
window_duration: 30s
275+
route:
276+
- otel_apm_service_map_route: 'getEventType() == "SERVICE_MAP"'
277+
- service_processed_metrics: 'getEventType() == "METRIC"'
278+
sink:
279+
- opensearch:
280+
hosts:
281+
- '${cfg.opensearchEndpoint}'
282+
aws:
283+
serverless: true
284+
region: '${cfg.region}'
285+
sts_role_arn: "${cfg.roleArn}"
286+
routes:
287+
- otel_apm_service_map_route
288+
index: 'otel-v2-apm-service-map'
289+
template_type: 'index-template'
290+
template_content: '${SERVICE_MAP_TEMPLATE}'
291+
- prometheus:
292+
url: '${cfg.prometheusUrl}'
293+
aws:
294+
region: '${cfg.region}'
295+
routes:
296+
- service_processed_metrics
297+
298+
otel-metrics-pipeline:
299+
source:
300+
pipeline:
301+
name: otlp-pipeline
302+
processor:
303+
- otel_metrics:
304+
sink:
305+
- prometheus:
306+
url: '${cfg.prometheusUrl}'
307+
aws:
308+
region: '${cfg.region}'
309+
`;
310+
}

0 commit comments

Comments
 (0)