|
4 | 4 |
|
5 | 5 | import hashlib |
6 | 6 | import logging |
7 | | -from typing import Any, Dict, Tuple |
| 7 | +import re |
| 8 | +from typing import Any, Dict, List, Tuple |
8 | 9 |
|
9 | 10 | from atlas.learning.usage import get_tracker |
10 | 11 | from atlas.runtime.orchestration.execution_context import ExecutionContext |
@@ -100,3 +101,125 @@ def resolve_playbook( |
100 | 101 | } |
101 | 102 |
|
102 | 103 | return trimmed, digest, metadata |
| 104 | + |
| 105 | + |
| 106 | +def extract_few_shot_examples( |
| 107 | + metadata: Dict[str, Any] | None, |
| 108 | + role: str, |
| 109 | + *, |
| 110 | + max_tokens: int = 500, |
| 111 | + redaction_patterns: List[str] | None = None, |
| 112 | + chars_per_token: float = 3.5, |
| 113 | + max_entries: int = 10, |
| 114 | + max_examples_per_block: int = 2, |
| 115 | +) -> str | None: |
| 116 | + """Extract and format few-shot examples from learning_usage. |
| 117 | +
|
| 118 | + Args: |
| 119 | + metadata: Playbook metadata dictionary (not currently used for example extraction) |
| 120 | + role: Either "student" or "teacher" to select which examples to extract |
| 121 | + max_tokens: Approximate token budget for few-shot examples |
| 122 | + redaction_patterns: List of regex patterns for redacting sensitive data |
| 123 | + chars_per_token: Conservative multiplier for token-to-char conversion (default 3.5) |
| 124 | + max_entries: Maximum number of learning entries to process (default 10) |
| 125 | + max_examples_per_block: Maximum examples per cue/adoption block (default 2) |
| 126 | +
|
| 127 | + Returns: |
| 128 | + Formatted few-shot examples string, or None if no examples available |
| 129 | +
|
| 130 | + Example output: |
| 131 | + >>> Few-Shot Examples >>> |
| 132 | + Entry abc123: |
| 133 | + Cue examples: |
| 134 | + 1. investigating latency issues |
| 135 | + Action examples: |
| 136 | + 1. metrics.query -> success |
| 137 | + >>> End Few-Shot Examples >>> |
| 138 | + """ |
| 139 | + |
| 140 | + if not isinstance(metadata, dict): |
| 141 | + return None |
| 142 | + if max_tokens <= 0: |
| 143 | + return None |
| 144 | + |
| 145 | + try: |
| 146 | + context = ExecutionContext.get() |
| 147 | + learning_usage = context.metadata.get("learning_usage", {}) |
| 148 | + except Exception: |
| 149 | + return None |
| 150 | + |
| 151 | + if not isinstance(learning_usage, dict): |
| 152 | + return None |
| 153 | + |
| 154 | + role_usage = learning_usage.get("roles", {}).get(role, {}) |
| 155 | + if not isinstance(role_usage, dict): |
| 156 | + return None |
| 157 | + |
| 158 | + examples_blocks: List[str] = [] |
| 159 | + char_budget = int(max_tokens * chars_per_token) # Conservative estimate with safety margin |
| 160 | + char_used = 0 |
| 161 | + entries_processed = 0 |
| 162 | + |
| 163 | + for entry_id, entry_data in role_usage.items(): |
| 164 | + if entries_processed >= max_entries: |
| 165 | + break |
| 166 | + |
| 167 | + if not isinstance(entry_data, dict): |
| 168 | + continue |
| 169 | + |
| 170 | + cue_examples = entry_data.get("cue_examples", []) or [] |
| 171 | + adoption_examples = entry_data.get("adoption_examples", []) or [] |
| 172 | + if not cue_examples and not adoption_examples: |
| 173 | + continue |
| 174 | + |
| 175 | + block_lines: List[str] = [f"Entry {entry_id}:"] |
| 176 | + |
| 177 | + if cue_examples: |
| 178 | + block_lines.append(" Cue examples:") |
| 179 | + for idx, example in enumerate(cue_examples[:max_examples_per_block], start=1): |
| 180 | + redacted = _redact_sensitive_data(str(example), redaction_patterns) |
| 181 | + block_lines.append(f" {idx}. {redacted}") |
| 182 | + |
| 183 | + if adoption_examples: |
| 184 | + block_lines.append(" Action examples:") |
| 185 | + for idx, example in enumerate(adoption_examples[:max_examples_per_block], start=1): |
| 186 | + if isinstance(example, dict): |
| 187 | + tool = example.get("tool_name") or example.get("runtime_handle") or "unknown" |
| 188 | + status = example.get("status") or ("success" if example.get("success") else "unknown") |
| 189 | + block_lines.append(f" {idx}. {tool} -> {status}") |
| 190 | + else: |
| 191 | + redacted = _redact_sensitive_data(str(example), redaction_patterns) |
| 192 | + block_lines.append(f" {idx}. {redacted}") |
| 193 | + |
| 194 | + block_text = "\n".join(block_lines).strip() |
| 195 | + block_chars = len(block_text) |
| 196 | + if char_used + block_chars > char_budget: |
| 197 | + break |
| 198 | + |
| 199 | + examples_blocks.append(block_text) |
| 200 | + char_used += block_chars |
| 201 | + entries_processed += 1 |
| 202 | + |
| 203 | + if not examples_blocks: |
| 204 | + return None |
| 205 | + |
| 206 | + # Use list + join pattern for better performance |
| 207 | + parts = [ |
| 208 | + ">>> Few-Shot Examples >>>", |
| 209 | + "\n\n".join(examples_blocks), |
| 210 | + ">>> End Few-Shot Examples >>>", |
| 211 | + ] |
| 212 | + return "\n".join(parts) |
| 213 | + |
| 214 | + |
| 215 | +def _redact_sensitive_data(text: str, patterns: List[str] | None) -> str: |
| 216 | + """Redact sensitive data using regex patterns.""" |
| 217 | + if not patterns: |
| 218 | + return text |
| 219 | + redacted = text |
| 220 | + for pattern in patterns: |
| 221 | + try: |
| 222 | + redacted = re.sub(pattern, "[REDACTED]", redacted, flags=re.IGNORECASE) |
| 223 | + except re.error: |
| 224 | + continue |
| 225 | + return redacted |
0 commit comments