-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpredictive.zod.ts
More file actions
311 lines (266 loc) · 12.5 KB
/
predictive.zod.ts
File metadata and controls
311 lines (266 loc) · 12.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license.
import { z } from 'zod';
import { TokenUsageSchema } from './cost.zod';
/**
* Predictive Analytics Protocol
*
* Defines predictive models and machine learning configurations for
* data-driven decision making and forecasting in ObjectStack applications.
*
* Use Cases:
* - Lead scoring and conversion prediction
* - Customer churn prediction
* - Sales forecasting
* - Demand forecasting
* - Anomaly detection in operational data
* - Customer segmentation and clustering
* - Price optimization
* - Recommendation systems
*/
/**
* Predictive Model Types
*/
export const PredictiveModelTypeSchema = z.enum([
'classification', // Binary or multi-class classification
'regression', // Numerical prediction
'clustering', // Unsupervised grouping
'forecasting', // Time-series prediction
'anomaly_detection', // Outlier detection
'recommendation', // Item or action recommendation
'ranking', // Ordering items by relevance
]);
/**
* Model Feature Definition
* Describes an input feature for a predictive model
*/
export const ModelFeatureSchema = z.object({
/** Feature Identity */
name: z.string().regex(/^[a-z_][a-z0-9_]*$/).describe('Feature name (snake_case)'),
label: z.string().optional().describe('Human-readable label'),
/** Data Source */
field: z.string().describe('Source field name'),
object: z.string().optional().describe('Source object (if different from target)'),
/** Feature Type */
dataType: z.enum(['numeric', 'categorical', 'text', 'datetime', 'boolean']).describe('Feature data type'),
/** Feature Engineering */
transformation: z.enum([
'none',
'normalize', // Normalize to 0-1 range
'standardize', // Z-score standardization
'one_hot_encode', // One-hot encoding for categorical
'label_encode', // Label encoding for categorical
'log_transform', // Logarithmic transformation
'binning', // Discretize continuous values
'embedding', // Text/categorical embedding
]).optional().default('none'),
/** Configuration */
required: z.boolean().optional().default(true),
defaultValue: z.unknown().optional(),
/** Metadata */
description: z.string().optional(),
importance: z.number().optional().describe('Feature importance score (0-1)'),
});
/**
* Model Hyperparameters
* Configuration specific to model algorithms
*/
export const HyperparametersSchema = z.object({
/** General Parameters */
learningRate: z.number().optional().describe('Learning rate for training'),
epochs: z.number().int().optional().describe('Number of training epochs'),
batchSize: z.number().int().optional().describe('Training batch size'),
/** Tree-based Models (Random Forest, XGBoost, etc.) */
maxDepth: z.number().int().optional().describe('Maximum tree depth'),
numTrees: z.number().int().optional().describe('Number of trees in ensemble'),
minSamplesSplit: z.number().int().optional().describe('Minimum samples to split node'),
minSamplesLeaf: z.number().int().optional().describe('Minimum samples in leaf node'),
/** Neural Networks */
hiddenLayers: z.array(z.number().int()).optional().describe('Hidden layer sizes'),
activation: z.string().optional().describe('Activation function'),
dropout: z.number().optional().describe('Dropout rate'),
/** Regularization */
l1Regularization: z.number().optional().describe('L1 regularization strength'),
l2Regularization: z.number().optional().describe('L2 regularization strength'),
/** Clustering */
numClusters: z.number().int().optional().describe('Number of clusters (k-means, etc.)'),
/** Time Series */
seasonalPeriod: z.number().int().optional().describe('Seasonal period for time series'),
forecastHorizon: z.number().int().optional().describe('Number of periods to forecast'),
/** Additional custom parameters */
custom: z.record(z.string(), z.unknown()).optional().describe('Algorithm-specific parameters'),
});
/**
* Model Training Configuration
*/
export const TrainingConfigSchema = z.object({
/** Data Split */
trainingDataRatio: z.number().min(0).max(1).optional().default(0.8).describe('Proportion of data for training'),
validationDataRatio: z.number().min(0).max(1).optional().default(0.1).describe('Proportion for validation'),
testDataRatio: z.number().min(0).max(1).optional().default(0.1).describe('Proportion for testing'),
/** Data Filtering */
dataFilter: z.string().optional().describe('Formula to filter training data'),
minRecords: z.number().int().optional().default(100).describe('Minimum records required'),
maxRecords: z.number().int().optional().describe('Maximum records to use'),
/** Training Strategy */
strategy: z.enum(['full', 'incremental', 'online', 'transfer_learning']).optional().default('full'),
crossValidation: z.boolean().optional().default(true),
folds: z.number().int().min(2).max(10).optional().default(5).describe('Cross-validation folds'),
/** Early Stopping */
earlyStoppingEnabled: z.boolean().optional().default(true),
earlyStoppingPatience: z.number().int().optional().default(10).describe('Epochs without improvement before stopping'),
/** Resource Limits */
maxTrainingTime: z.number().optional().describe('Maximum training time in seconds'),
gpuEnabled: z.boolean().optional().default(false),
/** Reproducibility */
randomSeed: z.number().int().optional().describe('Random seed for reproducibility'),
}).superRefine((data, ctx) => {
if (data.trainingDataRatio && data.validationDataRatio && data.testDataRatio) {
const sum = data.trainingDataRatio + data.validationDataRatio + data.testDataRatio;
if (Math.abs(sum - 1) > 0.01) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: `Data split ratios must sum to 1. Current sum: ${sum}`,
path: ['trainingDataRatio'],
});
}
}
});
/**
* Model Evaluation Metrics
*/
export const EvaluationMetricsSchema = z.object({
/** Classification Metrics */
accuracy: z.number().optional(),
precision: z.number().optional(),
recall: z.number().optional(),
f1Score: z.number().optional(),
auc: z.number().optional().describe('Area Under ROC Curve'),
/** Regression Metrics */
mse: z.number().optional().describe('Mean Squared Error'),
rmse: z.number().optional().describe('Root Mean Squared Error'),
mae: z.number().optional().describe('Mean Absolute Error'),
r2Score: z.number().optional().describe('R-squared score'),
/** Clustering Metrics */
silhouetteScore: z.number().optional(),
daviesBouldinIndex: z.number().optional(),
/** Time Series Metrics */
mape: z.number().optional().describe('Mean Absolute Percentage Error'),
smape: z.number().optional().describe('Symmetric MAPE'),
/** Additional Metrics */
custom: z.record(z.string(), z.number()).optional(),
});
/**
* Predictive Model Schema
* Complete definition of a predictive model
*/
export const PredictiveModelSchema = z.object({
/** Identity */
name: z.string().regex(/^[a-z_][a-z0-9_]*$/).describe('Model unique identifier (snake_case)'),
label: z.string().describe('Model display name'),
description: z.string().optional(),
/** Model Type */
type: PredictiveModelTypeSchema,
algorithm: z.string().optional().describe('Specific algorithm (e.g., "random_forest", "xgboost", "lstm")'),
/** Target Object & Field */
objectName: z.string().describe('Target object for predictions'),
target: z.string().describe('Target field to predict'),
targetType: z.enum(['numeric', 'categorical', 'binary']).optional().describe('Target field type'),
/** Features */
features: z.array(ModelFeatureSchema).describe('Input features for the model'),
/** Hyperparameters */
hyperparameters: HyperparametersSchema.optional(),
/** Training Configuration */
training: TrainingConfigSchema.optional(),
/** Model Performance */
metrics: EvaluationMetricsSchema.optional().describe('Evaluation metrics from last training'),
/** Deployment */
deploymentStatus: z.enum(['draft', 'training', 'trained', 'deployed', 'deprecated']).optional().default('draft'),
version: z.string().optional().default('1.0.0'),
/** Prediction Configuration */
predictionField: z.string().optional().describe('Field to store predictions'),
confidenceField: z.string().optional().describe('Field to store confidence scores'),
updateTrigger: z.enum(['on_create', 'on_update', 'manual', 'scheduled']).optional().default('on_create'),
/** Retraining */
autoRetrain: z.boolean().optional().default(false),
retrainSchedule: z.string().optional().describe('Cron expression for auto-retraining'),
retrainThreshold: z.number().optional().describe('Performance threshold to trigger retraining'),
/** Explainability */
enableExplainability: z.boolean().optional().default(false).describe('Generate feature importance & explanations'),
/** Monitoring */
enableMonitoring: z.boolean().optional().default(true),
alertOnDrift: z.boolean().optional().default(true).describe('Alert when model drift is detected'),
/** Access Control */
active: z.boolean().optional().default(true),
owner: z.string().optional().describe('User ID of model owner'),
permissions: z.array(z.string()).optional().describe('User/group IDs with access'),
/** Metadata */
tags: z.array(z.string()).optional(),
category: z.string().optional().describe('Model category (e.g., "sales", "marketing", "operations")'),
lastTrainedAt: z.string().datetime().optional().describe('ISO timestamp'),
createdAt: z.string().datetime().optional().describe('ISO timestamp'),
updatedAt: z.string().datetime().optional().describe('ISO timestamp'),
});
/**
* Prediction Request
* Request for making predictions using a trained model
*/
export const PredictionRequestSchema = z.object({
modelName: z.string().describe('Model to use for prediction'),
recordIds: z.array(z.string()).optional().describe('Specific records to predict (if not provided, uses all)'),
inputData: z.record(z.string(), z.unknown()).optional().describe('Direct input data (alternative to recordIds)'),
returnConfidence: z.boolean().optional().default(true),
returnExplanation: z.boolean().optional().default(false),
});
/**
* Prediction Result
* Result of a prediction request
*/
export const PredictionResultSchema = z.object({
modelName: z.string(),
modelVersion: z.string(),
recordId: z.string().optional(),
prediction: z.unknown().describe('The predicted value'),
confidence: z.number().optional().describe('Confidence score (0-1)'),
probabilities: z.record(z.string(), z.number()).optional().describe('Class probabilities (for classification)'),
explanation: z.object({
topFeatures: z.array(z.object({
feature: z.string(),
importance: z.number(),
value: z.unknown(),
})).optional(),
reasoning: z.string().optional(),
}).optional(),
tokens: TokenUsageSchema.optional().describe('Token usage for this prediction (if AI-powered)'),
cost: z.number().nonnegative().optional().describe('Cost for this prediction in USD'),
metadata: z.object({
executionTime: z.number().optional().describe('Execution time in milliseconds'),
timestamp: z.string().datetime().optional().describe('ISO timestamp'),
}).optional(),
});
/**
* Model Drift Detection
* Monitoring for model performance degradation
*/
export const ModelDriftSchema = z.object({
modelName: z.string(),
driftType: z.enum(['feature_drift', 'prediction_drift', 'performance_drift']),
severity: z.enum(['low', 'medium', 'high', 'critical']),
detectedAt: z.string().datetime().describe('ISO timestamp'),
metrics: z.object({
driftScore: z.number().describe('Drift magnitude (0-1)'),
affectedFeatures: z.array(z.string()).optional(),
performanceChange: z.number().optional().describe('Change in performance metric'),
}),
recommendation: z.string().optional(),
autoRetrainTriggered: z.boolean().optional().default(false),
});
// Type exports
export type PredictiveModelType = z.infer<typeof PredictiveModelTypeSchema>;
export type ModelFeature = z.infer<typeof ModelFeatureSchema>;
export type Hyperparameters = z.infer<typeof HyperparametersSchema>;
export type TrainingConfig = z.infer<typeof TrainingConfigSchema>;
export type EvaluationMetrics = z.infer<typeof EvaluationMetricsSchema>;
export type PredictiveModel = z.infer<typeof PredictiveModelSchema>;
export type PredictionRequest = z.infer<typeof PredictionRequestSchema>;
export type PredictionResult = z.infer<typeof PredictionResultSchema>;
export type ModelDrift = z.infer<typeof ModelDriftSchema>;