Skip to content

Commit a846521

Browse files
committed
health tools
1 parent 654472d commit a846521

10 files changed

Lines changed: 790 additions & 80 deletions

File tree

models/tools/AsyncTools.bx

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,63 @@ class extends="BaseTool"{
2525
return application.cbController.getAsyncManager().getExecutors().keyArray().sort()
2626
}
2727

28+
/**
29+
* Get a health assessment of all async executors.
30+
*
31+
* Returns a structured health report with status, score, and per-executor issues.
32+
*
33+
* Status indicators:
34+
* - healthy : All executors are healthy or idle
35+
* - warning : One or more executors are degraded
36+
* - critical : One or more executors are shutdown or terminated
37+
*/
38+
@mcpTool
39+
@AITool
40+
function async_get_health() {
41+
var executors = application.cbController.getAsyncManager().getExecutorStatusMap()
42+
var report = _newHealthReport()
43+
var total = executors.len()
44+
var healthy = 0
45+
var degraded = 0
46+
var critical = 0
47+
var idle = 0
48+
var shutdown = 0
49+
var terminated = 0
50+
var details = []
51+
52+
executors.each( ( name, executor ) => {
53+
var status = executor.healthStatus ?: "healthy"
54+
details.append( { "name" : name, "status" : status, "type" : executor.type ?: "" } )
55+
switch ( status ) {
56+
case "healthy": healthy++; break
57+
case "idle": idle++; break
58+
case "degraded":
59+
degraded++
60+
_addIssue( report, "warning", "EXECUTOR_DEGRADED", "Executor '#name#' is degraded", "Review executor stats using async_get_all()", name, { "status" : status } )
61+
break
62+
case "shutdown":
63+
shutdown++
64+
critical++
65+
_addIssue( report, "critical", "EXECUTOR_SHUTDOWN", "Executor '#name#' is shutdown", "Review executor stats using async_get_all()", name, { "status" : status } )
66+
break
67+
case "terminated":
68+
terminated++
69+
critical++
70+
_addIssue( report, "critical", "EXECUTOR_TERMINATED", "Executor '#name#' is terminated", "Review executor stats using async_get_all()", name, { "status" : status } )
71+
break
72+
}
73+
} )
74+
75+
return _buildHealthReport( report, {
76+
"total" : total,
77+
"healthy" : healthy,
78+
"degraded" : degraded,
79+
"critical" : critical,
80+
"idle" : idle,
81+
"shutdown" : shutdown,
82+
"terminated" : terminated,
83+
"details" : details
84+
} )
85+
}
86+
2887
}

models/tools/CacheBoxTools.bx

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,83 @@ class extends="BaseTool"{
128128
return application.cbController.getCacheBox().getCache( arguments.cacheName ).getStoreMetadataReport()
129129
}
130130

131+
/**
132+
* Get a health assessment of all CacheBox cache providers.
133+
*
134+
* Returns a structured health report with status, score, and per-cache issues.
135+
*
136+
* Status indicators:
137+
* - healthy : All caches enabled and hit rates are normal
138+
* - warning : One or more caches are disabled, thrashing, or have low hit rates
139+
* - critical : Not used for cache health in this report
140+
*/
141+
@mcpTool
142+
@AITool
143+
function cachebox_get_health() {
144+
var cacheBox = application.cbController.getCacheBox()
145+
var report = _newHealthReport()
146+
var cacheNames = cacheBox.getCacheNames()
147+
var total = cacheNames.len()
148+
var totalSize = 0
149+
var totalHits = 0
150+
var totalMisses = 0
151+
var totalEvictions = 0
152+
var details = []
153+
154+
cacheNames.each( ( cacheName ) => {
155+
var cache = cacheBox.getCache( cacheName )
156+
var stats = cache.getStats()
157+
var size = cache.getSize()
158+
var hits = stats.getHits()
159+
var misses = stats.getMisses()
160+
var evictions = stats.getEvictionCount()
161+
var enabled = cache.isEnabled()
162+
var requests = hits + misses
163+
var hitRate = requests > 0 ? int( ( hits / requests ) * 100 ) : 0
164+
165+
totalSize += size
166+
totalHits += hits
167+
totalMisses += misses
168+
totalEvictions += evictions
169+
170+
details.append( {
171+
"name" : cacheName,
172+
"provider" : getMetadata( cache ).name ?: "",
173+
"size" : size,
174+
"hits" : hits,
175+
"misses" : misses,
176+
"hitRate" : hitRate,
177+
"evictions" : evictions,
178+
"enabled" : enabled
179+
} )
180+
181+
if ( !enabled ) {
182+
_addIssue( report, "warning", "CACHEBOX_DISABLED", "Cache '#cacheName#' is disabled", "Enable the cache if caching is expected for this region", cacheName, { "enabled" : false } )
183+
}
184+
185+
if ( enabled && hitRate < 30 && requests >= 10 ) {
186+
_addIssue( report, "warning", "CACHEBOX_LOW_HIT_RATE", "Cache '#cacheName#' has a low hit rate of #hitRate#% (#hits# hits, #misses# misses)", "Review cache keys and TTL strategy", cacheName, { "hitRate" : hitRate, "hits" : hits, "misses" : misses } )
187+
}
188+
189+
if ( enabled && evictions > 0 && evictions >= size ) {
190+
_addIssue( report, "warning", "CACHEBOX_HIGH_EVICTIONS", "Cache '#cacheName#' has #evictions# evictions with only #size# items", "Increase cache capacity or review TTL settings", cacheName, { "evictions" : evictions, "size" : size } )
191+
}
192+
} )
193+
194+
var totalRequests = totalHits + totalMisses
195+
var overallHitRate = totalRequests > 0 ? int( ( totalHits / totalRequests ) * 100 ) : 0
196+
197+
return _buildHealthReport( report, {
198+
"total" : total,
199+
"totalSize" : totalSize,
200+
"totalHits" : totalHits,
201+
"totalMisses" : totalMisses,
202+
"totalEvictions" : totalEvictions,
203+
"overallHitRate" : overallHitRate,
204+
"details" : details
205+
} )
206+
}
207+
131208
/**
132209
* Clear ALL elements from a specific cache provider.
133210
*

models/tools/LogBoxTools.bx

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,73 @@ class extends="BaseTool"{
277277
return results
278278
}
279279

280+
/**
281+
* Get a health assessment of the LogBox logging infrastructure.
282+
*
283+
* Returns a structured health report with status, score, and logger/appender issues.
284+
*
285+
* Status indicators:
286+
* - healthy : All appenders are initialized and logger coverage looks normal
287+
* - warning : One or more appenders are not initialized or a logger has no appenders
288+
* - critical : Not used for LogBox health in this report
289+
*/
290+
@mcpTool
291+
@AITool
292+
function logbox_get_health() {
293+
var logBox = application.cbController.getLogBox()
294+
var report = _newHealthReport()
295+
var loggerRegistry = logBox.getLoggerRegistry()
296+
var appenderRegistry = logBox.getAppenderRegistry()
297+
var totalLoggers = loggerRegistry.len()
298+
var totalAppenders = appenderRegistry.len()
299+
var initialized = 0
300+
var uninitialized = 0
301+
var fileAppenders = 0
302+
var recentErrors = 0
303+
304+
appenderRegistry.each( ( name, appender ) => {
305+
if ( appender.isInitialized() ) {
306+
initialized++
307+
} else {
308+
uninitialized++
309+
_addIssue( report, "warning", "LOGBOX_APPENDER_NOT_INITIALIZED", "Appender '#name#' is not initialized", "Verify appender configuration and target path or destination", name )
310+
}
311+
312+
if ( isInstanceOf( appender, "coldbox.system.logging.appenders.FileAppender" ) ) {
313+
fileAppenders++
314+
var logFile = appender.getLogFullpath()
315+
if ( fileExists( logFile ) ) {
316+
var rawLines = fileRead( logFile ).listToArray( char( 10 ) )
317+
var startIdx = max( 1, rawLines.len() - 100 + 1 )
318+
for ( var i = rawLines.len(); i >= startIdx; i-- ) {
319+
var trimmed = trim( rawLines[ i ] )
320+
if ( len( trimmed ) && ( findNoCase( '"ERROR"', trimmed ) || findNoCase( '"FATAL"', trimmed ) ) ) {
321+
recentErrors++
322+
}
323+
}
324+
}
325+
}
326+
} )
327+
328+
loggerRegistry.each( ( category, logger ) => {
329+
if ( logger.hasAppenders() ) return
330+
_addIssue( report, "warning", "LOGBOX_LOGGER_NO_APPENDERS", "Logger '#category#' has no appenders", "Attach an appender or verify root logger coverage", category )
331+
} )
332+
333+
if ( recentErrors > 0 ) {
334+
_addIssue( report, "info", "LOGBOX_RECENT_ERRORS", "Detected #recentErrors# recent ERROR/FATAL log entries across file appenders", "Review recent log output for the related appenders or categories" )
335+
}
336+
337+
return _buildHealthReport( report, {
338+
"totalLoggers" : totalLoggers,
339+
"totalAppenders" : totalAppenders,
340+
"initializedAppenders" : initialized,
341+
"uninitializedAppenders" : uninitialized,
342+
"fileAppenders" : fileAppenders,
343+
"recentErrors" : recentErrors
344+
} )
345+
}
346+
280347
/**
281348
* Parse a single CSV log line written by FileAppender into a struct.
282349
* Expected format: "SEVERITY","AppenderName","MM/dd/yyyy","HH:mm:ss","category","message"

models/tools/SchedulerTools.bx

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,69 @@ class extends="BaseTool"{
118118
return "Task '#arguments.taskName#' in scheduler '#arguments.schedulerName#' has been resumed."
119119
}
120120

121+
/**
122+
* Get a health assessment of all schedulers and their tasks.
123+
*
124+
* Returns a structured health report with status, score, and per-scheduler issues.
125+
*
126+
* Status indicators:
127+
* - healthy : All schedulers started and tasks are active
128+
* - warning : One or more schedulers are not started or all tasks are paused
129+
* - critical : Not used for scheduler health in this report
130+
*/
131+
@mcpTool
132+
@AITool
133+
function scheduler_get_health() {
134+
var report = _newHealthReport()
135+
var schedulers = getSchedulerMap()
136+
var totalTasks = 0
137+
var activeTasks = 0
138+
var pausedTasks = 0
139+
var startedCount = 0
140+
141+
schedulers.each( ( key, scheduler ) => {
142+
var schedName = scheduler.getSchedulerName()
143+
144+
if ( scheduler.hasStarted() ) {
145+
startedCount++
146+
} else {
147+
_addIssue( report, "warning", "SCHEDULER_NOT_STARTED", "Scheduler '#schedName#' has not been started", "Start the scheduler during application startup", schedName )
148+
}
149+
150+
var taskCount = 0
151+
var pausedCount = 0
152+
scheduler.getRegisteredTasks().each( ( taskName ) => {
153+
var taskRecord = scheduler.getTaskRecord( taskName )
154+
if ( !isNull( taskRecord ) && !isNull( taskRecord.task ) ) {
155+
taskCount++
156+
if ( taskRecord.task.isDisabled() ) {
157+
pausedTasks++
158+
pausedCount++
159+
} else {
160+
activeTasks++
161+
}
162+
}
163+
} )
164+
165+
totalTasks += taskCount
166+
167+
if ( taskCount > 0 && pausedCount == taskCount ) {
168+
_addIssue( report, "warning", "SCHEDULER_ALL_TASKS_PAUSED", "All #pausedCount# task(s) in scheduler '#schedName#' are paused", "Resume tasks using scheduler_resume_task()", schedName )
169+
}
170+
} )
171+
172+
var stoppedCount = schedulers.size() - startedCount
173+
174+
return _buildHealthReport( report, {
175+
"totalSchedulers" : schedulers.size(),
176+
"startedSchedulers" : startedCount,
177+
"stoppedSchedulers" : stoppedCount,
178+
"totalTasks" : totalTasks,
179+
"activeTasks" : activeTasks,
180+
"pausedTasks" : pausedTasks
181+
} )
182+
}
183+
121184
/*********************************** PRIVATE HELPERS ***********************************/
122185

123186
/**

models/tools/SystemTools.bx

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,22 @@
77
*/
88
class extends="BaseTool"{
99

10+
/**
11+
* Safely call a subsystem health method and normalize failures into warning issues.
12+
*
13+
* @caller The closure that returns a subsystem health report
14+
* @label The subsystem label used in fallback messages
15+
*/
16+
private struct function _safeHealthCall( required any caller, required string label ) {
17+
try {
18+
return arguments.caller()
19+
} catch ( any e ) {
20+
var report = _newHealthReport()
21+
_addIssue( report, "warning", "SYSTEM_SUBSYSTEM_ERROR", "Unable to retrieve health for #arguments.label#: #e.message#", "Check runtime logs for errors related to #arguments.label#" )
22+
return _buildHealthReport( report, { "error" : true, "label" : arguments.label } )
23+
}
24+
}
25+
1026
/**
1127
* Get the server's current date/time in ISO 8601 format.
1228
*/
@@ -88,6 +104,68 @@ class extends="BaseTool"{
88104
return result
89105
}
90106

107+
/**
108+
* Get a consolidated health assessment across all cbMCP subsystems.
109+
*
110+
* Aggregates async executors, caches, schedulers, and LogBox into a single structured health report.
111+
*
112+
* Overall status is the worst of all subsystems. Overall score is the lowest subsystem score.
113+
*/
114+
@mcpTool
115+
@AITool
116+
function system_get_health() {
117+
var subsystems = {}
118+
119+
subsystems.async = _safeHealthCall( () => new AsyncTools().async_get_health(), "async" )
120+
subsystems.cachebox = _safeHealthCall( () => new CacheBoxTools().cachebox_get_health(), "cachebox" )
121+
subsystems.scheduler = _safeHealthCall( () => new SchedulerTools().scheduler_get_health(), "scheduler" )
122+
subsystems.logbox = _safeHealthCall( () => new LogBoxTools().logbox_get_health(), "logbox" )
123+
124+
var overallScore = 100
125+
var overallStatus = "healthy"
126+
127+
subsystems.each( ( key, sub ) => {
128+
if ( isStruct( sub ) ) {
129+
var subScore = isNumeric( sub.score ?: -1 ) ? sub.score : 100
130+
var subStatus = sub.status ?: "healthy"
131+
if ( subScore < overallScore ) overallScore = subScore
132+
if ( subStatus == "critical" ) {
133+
overallStatus = "critical"
134+
} else if ( subStatus == "warning" && overallStatus != "critical" ) {
135+
overallStatus = "warning"
136+
}
137+
}
138+
} )
139+
140+
var allIssues = []
141+
subsystems.each( ( key, sub ) => {
142+
if ( isStruct( sub ) && sub.keyExists( "issues" ) && isArray( sub.issues ) ) {
143+
sub.issues.each( ( issue ) => {
144+
var prefixed = duplicate( issue )
145+
prefixed.code = "#key#:#prefixed.code#"
146+
allIssues.append( prefixed )
147+
} )
148+
}
149+
} )
150+
151+
return {
152+
"status" : overallStatus,
153+
"score" : overallScore,
154+
"issues" : allIssues,
155+
"subsystems" : subsystems,
156+
"scoreBreakdown" : {
157+
"base" : 100,
158+
"subsystems" : subsystems.map( ( key, sub ) => {
159+
return {
160+
"subsystem" : key,
161+
"score" : isStruct( sub ) && isNumeric( sub.score ?: -1 ) ? sub.score : 100,
162+
"status" : isStruct( sub ) ? ( sub.status ?: "unknown" ) : "error"
163+
}
164+
} )
165+
}
166+
}
167+
}
168+
91169
/**
92170
* Get the value of a specific ColdBox application setting by key.
93171
*

0 commit comments

Comments
 (0)