Skip to content

Commit ae1dbff

Browse files
author
Yuriy Bezsonov
committed
Merge PR #720: Add bedrock-based thread dump analysis features
- Resolved import conflicts in UnicornStoreStack.java - Moved EKS role creation from InfrastructureContainers to InfrastructureEks - Added ECS permissions to EKS pod role for container monitoring - Added StorageClass gp3 and RBAC setup for otel-collector in eks.sh - Integrated thread dump analysis with Bedrock AI capabilities
2 parents 768ec55 + 1062026 commit ae1dbff

34 files changed

Lines changed: 4425 additions & 300 deletions

apps/unicorn-store-spring/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,12 @@
5858
<groupId>org.springframework.boot</groupId>
5959
<artifactId>spring-boot-starter-actuator</artifactId>
6060
</dependency>
61+
<!-- https://mvnrepository.com/artifact/io.micrometer/micrometer-registry-prometheus -->
62+
<dependency>
63+
<groupId>io.micrometer</groupId>
64+
<artifactId>micrometer-registry-prometheus</artifactId>
65+
<version>1.14.7</version>
66+
</dependency>
6167
<!-- AWS SDK dependencies -->
6268
<dependency>
6369
<groupId>software.amazon.awssdk</groupId>
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
package com.unicorn.store.config;
2+
3+
import com.fasterxml.jackson.databind.JsonNode;
4+
import com.fasterxml.jackson.databind.ObjectMapper;
5+
import io.micrometer.core.instrument.MeterRegistry;
6+
import io.micrometer.core.instrument.config.MeterFilter;
7+
import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
8+
import org.springframework.context.annotation.Bean;
9+
import org.springframework.context.annotation.Configuration;
10+
11+
import java.io.File;
12+
import java.io.IOException;
13+
import java.net.InetAddress;
14+
import java.net.URI;
15+
import java.net.UnknownHostException;
16+
import java.net.http.HttpClient;
17+
import java.net.http.HttpRequest;
18+
import java.net.http.HttpResponse;
19+
import java.nio.file.Files;
20+
import java.util.Optional;
21+
22+
@Configuration
23+
public class MonitoringConfig {
24+
25+
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
26+
private static final File NAMESPACE_FILE = new File("/var/run/secrets/kubernetes.io/serviceaccount/namespace");
27+
28+
@Bean
29+
public MeterRegistryCustomizer<MeterRegistry> meterRegistryCustomizer() {
30+
return registry -> {
31+
String clusterType = System.getenv("ECS_CONTAINER_METADATA_URI_V4") != null ? "ecs" : "eks";
32+
String cluster = clusterType.equals("ecs") ? extractClusterNameFromMetadata().orElse("unknown") : Optional.ofNullable(System.getenv("CLUSTER")).orElse("unknown");
33+
String containerName = "unicorn-store-spring";
34+
String taskOrPodId = extractTaskOrPodId().orElse("unknown");
35+
String namespace = clusterType.equals("eks") ? readNamespaceFile().orElse("default") : "";
36+
37+
// Get the container/pod IP address
38+
String ipAddress = getContainerOrPodIp().orElse("unknown");
39+
40+
registry.config().commonTags(
41+
"cluster", cluster,
42+
"cluster_type", clusterType,
43+
"container_name", containerName,
44+
"task_pod_id", taskOrPodId,
45+
"instance", ipAddress, // Keep this for backward compatibility
46+
"container_ip", ipAddress // Add this new tag that won't be overwritten
47+
);
48+
49+
if (!namespace.isEmpty()) {
50+
registry.config().commonTags("namespace", namespace);
51+
} else {
52+
registry.config().commonTags("namespace", "<no namespace>");
53+
}
54+
55+
registry.config().meterFilter(
56+
MeterFilter.deny(id ->
57+
id.getName().equals("jvm.gc.pause") &&
58+
!id.getTags().stream().allMatch(tag ->
59+
tag.getKey().equals("action") ||
60+
tag.getKey().equals("cause") ||
61+
tag.getKey().equals("gc")
62+
)
63+
)
64+
);
65+
};
66+
}
67+
68+
private Optional<String> extractTaskOrPodId() {
69+
String metadataUri = System.getenv("ECS_CONTAINER_METADATA_URI_V4");
70+
if (metadataUri != null) {
71+
try {
72+
HttpRequest request = HttpRequest.newBuilder()
73+
.uri(URI.create(metadataUri + "/task"))
74+
.build();
75+
76+
HttpResponse<String> response = HttpClient.newHttpClient()
77+
.send(request, HttpResponse.BodyHandlers.ofString());
78+
79+
JsonNode root = OBJECT_MAPPER.readTree(response.body());
80+
String taskArn = root.path("TaskARN").asText();
81+
String[] parts = taskArn.split("/");
82+
return parts.length > 1 ? Optional.of(parts[parts.length - 1]) : Optional.empty();
83+
84+
} catch (IOException | InterruptedException e) {
85+
return Optional.empty();
86+
}
87+
}
88+
89+
// EKS fallback: read pod name from Downward API
90+
return readFile("/etc/podinfo/name");
91+
}
92+
93+
private Optional<String> extractClusterNameFromMetadata() {
94+
String metadataUri = System.getenv("ECS_CONTAINER_METADATA_URI_V4");
95+
if (metadataUri != null) {
96+
try {
97+
HttpRequest request = HttpRequest.newBuilder()
98+
.uri(URI.create(metadataUri + "/task"))
99+
.build();
100+
101+
HttpResponse<String> response = HttpClient.newHttpClient()
102+
.send(request, HttpResponse.BodyHandlers.ofString());
103+
104+
JsonNode root = OBJECT_MAPPER.readTree(response.body());
105+
String clusterArn = root.path("Cluster").asText();
106+
String[] parts = clusterArn.split("/");
107+
return parts.length > 1 ? Optional.of(parts[parts.length - 1]) : Optional.empty();
108+
109+
} catch (IOException | InterruptedException e) {
110+
return Optional.empty();
111+
}
112+
}
113+
return Optional.empty();
114+
}
115+
116+
private Optional<String> readNamespaceFile() {
117+
return readFile(NAMESPACE_FILE.getAbsolutePath());
118+
}
119+
120+
private Optional<String> readFile(String path) {
121+
try {
122+
return Optional.of(Files.readString(new File(path).toPath()).trim());
123+
} catch (IOException e) {
124+
return Optional.empty();
125+
}
126+
}
127+
128+
// New method to get the container or pod IP address
129+
private Optional<String> getContainerOrPodIp() {
130+
// For ECS
131+
String metadataUri = System.getenv("ECS_CONTAINER_METADATA_URI_V4");
132+
if (metadataUri != null) {
133+
try {
134+
HttpRequest request = HttpRequest.newBuilder()
135+
.uri(URI.create(metadataUri))
136+
.build();
137+
138+
HttpResponse<String> response = HttpClient.newHttpClient()
139+
.send(request, HttpResponse.BodyHandlers.ofString());
140+
141+
JsonNode root = OBJECT_MAPPER.readTree(response.body());
142+
143+
if (root.has("Networks") && root.path("Networks").isArray() && !root.path("Networks").isEmpty()) {
144+
JsonNode network = root.path("Networks").get(0);
145+
if (network.has("IPv4Addresses") && network.path("IPv4Addresses").isArray() &&
146+
!network.path("IPv4Addresses").isEmpty()) {
147+
return Optional.of(network.path("IPv4Addresses").get(0).asText());
148+
}
149+
}
150+
} catch (IOException | InterruptedException e) {
151+
// Fall through to next method
152+
}
153+
}
154+
155+
// For Kubernetes/EKS
156+
String podIp = System.getenv("KUBERNETES_POD_IP");
157+
if (podIp != null && !podIp.isEmpty()) {
158+
return Optional.of(podIp);
159+
}
160+
161+
// Try to get local IP as fallback
162+
try {
163+
return Optional.of(InetAddress.getLocalHost().getHostAddress());
164+
} catch (UnknownHostException e) {
165+
return Optional.empty();
166+
}
167+
}
168+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// src/main/java/com/unicorn/store/controller/ThreadManagementController.java
2+
package com.unicorn.store.controller;
3+
4+
import com.unicorn.store.service.ThreadGeneratorService;
5+
import org.springframework.http.ResponseEntity;
6+
import org.springframework.web.bind.annotation.*;
7+
8+
@RestController
9+
@RequestMapping("/api/threads")
10+
public class ThreadManagementController {
11+
12+
private final ThreadGeneratorService threadGeneratorService;
13+
14+
public ThreadManagementController(ThreadGeneratorService threadGeneratorService) {
15+
this.threadGeneratorService = threadGeneratorService;
16+
}
17+
18+
@PostMapping("/start")
19+
public ResponseEntity<String> startThreads(@RequestParam(defaultValue = "500") int count) {
20+
try {
21+
threadGeneratorService.startThreads(count);
22+
return ResponseEntity.ok("Successfully started " + count + " threads");
23+
} catch (IllegalStateException e) {
24+
return ResponseEntity.badRequest().body(e.getMessage());
25+
}
26+
}
27+
28+
@PostMapping("/stop")
29+
public ResponseEntity<String> stopThreads() {
30+
try {
31+
threadGeneratorService.stopThreads();
32+
return ResponseEntity.ok("Successfully stopped all threads");
33+
} catch (IllegalStateException e) {
34+
return ResponseEntity.badRequest().body(e.getMessage());
35+
}
36+
}
37+
38+
@GetMapping("/count")
39+
public ResponseEntity<Integer> getThreadCount() {
40+
return ResponseEntity.ok(threadGeneratorService.getActiveThreadCount());
41+
}
42+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// src/main/java/com/unicorn/store/monitoring/ThreadMonitoringMBean.java
2+
package com.unicorn.store.monitoring;
3+
4+
import com.unicorn.store.service.ThreadGeneratorService;
5+
import org.springframework.jmx.export.annotation.ManagedAttribute;
6+
import org.springframework.jmx.export.annotation.ManagedResource;
7+
import org.springframework.stereotype.Component;
8+
9+
@Component
10+
@ManagedResource(objectName = "com.unicorn.store:type=ThreadMonitoring,name=ThreadStats")
11+
public class ThreadMonitoringMBean {
12+
13+
private final ThreadGeneratorService threadGeneratorService;
14+
15+
public ThreadMonitoringMBean(ThreadGeneratorService threadGeneratorService) {
16+
this.threadGeneratorService = threadGeneratorService;
17+
}
18+
19+
@ManagedAttribute(description = "Number of active custom threads")
20+
public int getActiveThreadCount() {
21+
return threadGeneratorService.getActiveThreadCount();
22+
}
23+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// src/main/java/com/unicorn/store/service/ThreadGeneratorService.java
2+
package com.unicorn.store.service;
3+
4+
import org.slf4j.Logger;
5+
import org.slf4j.LoggerFactory;
6+
import org.springframework.stereotype.Service;
7+
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
import java.util.concurrent.atomic.AtomicBoolean;
11+
12+
@Service
13+
public class ThreadGeneratorService {
14+
private static final Logger logger = LoggerFactory.getLogger(ThreadGeneratorService.class);
15+
private final List<Thread> activeThreads = new ArrayList<>();
16+
private final AtomicBoolean running = new AtomicBoolean(false);
17+
18+
public synchronized void startThreads(int threadCount) {
19+
if (running.get()) {
20+
throw new IllegalStateException("Threads are already running");
21+
}
22+
23+
running.set(true);
24+
logger.info("Starting {} threads", threadCount);
25+
26+
for (int i = 0; i < threadCount; i++) {
27+
Thread thread = new Thread(new DummyWorkload(running));
28+
thread.setName("DummyThread-" + i);
29+
activeThreads.add(thread);
30+
thread.start();
31+
}
32+
33+
logger.info("Started {} threads", threadCount);
34+
}
35+
36+
public synchronized void stopThreads() {
37+
if (!running.get()) {
38+
throw new IllegalStateException("No threads are running");
39+
}
40+
41+
logger.info("Stopping {} threads", activeThreads.size());
42+
running.set(false);
43+
44+
// Wait for all threads to complete
45+
activeThreads.forEach(thread -> {
46+
try {
47+
thread.join(5000); // Wait up to 5 seconds for each thread
48+
} catch (InterruptedException e) {
49+
logger.warn("Interrupted while waiting for thread {} to stop", thread.getName());
50+
}
51+
});
52+
53+
activeThreads.clear();
54+
logger.info("All threads stopped");
55+
}
56+
57+
public int getActiveThreadCount() {
58+
return activeThreads.size();
59+
}
60+
61+
private static class DummyWorkload implements Runnable {
62+
private final AtomicBoolean running;
63+
64+
public DummyWorkload(AtomicBoolean running) {
65+
this.running = running;
66+
}
67+
68+
@Override
69+
public void run() {
70+
while (running.get()) {
71+
// Simulate some work
72+
try {
73+
// Calculate some dummy values to keep CPU busy
74+
double result = 0;
75+
for (int i = 0; i < 1000; i++) {
76+
result += Math.sqrt(i) * Math.random();
77+
}
78+
Thread.sleep(100); // Sleep to prevent excessive CPU usage
79+
} catch (InterruptedException e) {
80+
Thread.currentThread().interrupt();
81+
break;
82+
}
83+
}
84+
}
85+
}
86+
}
Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,30 @@
11
spring.sql.init.mode=always
2-
# spring.datasource.url=jdbc:postgresql://localhost:5432/unicorns
32
spring.datasource.username=postgres
4-
# spring.datasource.password=postgres
53
spring.jpa.database-platform=org.hibernate.dialect.PostgreSQLDialect
64
server.error.include-message=always
75
spring.jpa.open-in-view=false
8-
spring.datasource.hikari.maximumPoolSize=1
9-
spring.datasource.hikari.data-source-properties.preparedStatementCacheQueries=0
10-
management.endpoint.health.probes.enabled=true
116

12-
# Disable Hibernate usage of JDBC metadata
137
spring.jpa.properties.hibernate.temp.use_jdbc_metadata_defaults=false
14-
15-
# Database initialization will typically be performed outside of Spring lifecycle
168
spring.jpa.hibernate.ddl-auto=none
9+
10+
spring.datasource.hikari.initialization-fail-timeout=0
11+
spring.datasource.hikari.maximumPoolSize=1
1712
spring.datasource.hikari.allow-pool-suspension=true
13+
spring.datasource.hikari.data-source-properties.preparedStatementCacheQueries=0
14+
15+
# Virtual Threads
16+
spring.threads.virtual.enabled=true
17+
18+
# Actuator config
19+
spring.jmx-enabled=true
20+
management.endpoints.web.exposure.include=threaddump,prometheus,health,info
21+
management.endpoint.health.probes.enabled=true
22+
management.endpoint.prometheus.enabled=true
23+
management.endpoint.health.group.liveness.include=livenessState
24+
management.endpoint.health.group.readiness.include=readinessState
25+
26+
management.metrics.enable.all=true
27+
management.metrics.tags.application=otel-jmx-unicorn-store
1828

19-
# Enable virtual threads
20-
spring.threads.virtual.enabled=true
29+
server.port=8080
30+
server.address=0.0.0.0

infrastructure/cdk/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,5 +75,10 @@
7575
<version>${junit.version}</version>
7676
<scope>test</scope>
7777
</dependency>
78+
<dependency>
79+
<groupId>software.constructs</groupId>
80+
<artifactId>constructs</artifactId>
81+
<version>10.3.0</version>
82+
</dependency>
7883
</dependencies>
7984
</project>

0 commit comments

Comments
 (0)