Skip to content

Commit 8b43ae7

Browse files
nvidiaruvnet
andcommitted
fix(merge): surgical integration of upstream volcengine#1159 — 4 files
The initial merge of upstream 'Feature/memory opt' (volcengine#1159) auto-merged a unified operations schema into utils/uri.py and utils/content.py while our TAKE DEV choice left memory_updater.py / extract_loop.py expecting the old split schema. Runtime smoke test revealed cascading API mismatches: - 'ResolvedOperations' has no 'write_operations' (AttributeError) - serialize_with_metadata() arg order mismatch (TypeError on write) - LLM output validation would fail on missing reasoning/edit_overview_uris fields (upstream commented them out in schema_model_generator) Two parallel analysis agents (arch map of volcengine#1159 + call-graph audit) both converged on: TAKE DEV for schema-breaking changes, KEEP upstream for purely-additive improvements. This commit is the resulting minimal diff. Reverted to dev (schema-breaking bits): - utils/uri.py: ResolvedOperations keeps write_operations/edit_operations split (load-bearing for brain-hardening _MERGE_ON_COLLISION_TYPES) - utils/content.py: serialize_with_metadata(content, metadata) positional signature matches all 5 call sites (memory_updater x2, memory_type_registry x1, content_write x2 — upstream's new content_write also uses this form) - schema_model_generator.py: uncommented 'reasoning' and 'edit_overview_uris' Pydantic field definitions — extract_loop.py:196 requires both Patched to bridge upstream additive + dev signature: - memory_type_registry.py::initialize_memory_files — render content_template inline with jinja2, then call dev's serialize_with_metadata(rendered, metadata) instead of upstream's serialize_with_metadata(metadata, content_template=, extract_context=) Kept upstream wins untouched (already in merged state, fully additive): - MemoryTypeRegistry.initialize_memory_files() method (volcengine#1159) - MemoryTypeRegistry.__init__(load_schemas=True) auto-loading - ExtractContext.get_year/get_month/get_day helpers (used by events.yaml) - openviking/telemetry/tracer.py module - compressor_v2 v2_lock_retry config + tracer (volcengine#1275) - merge_op/base.py get_first_replace() helper - merge_op/patch.py None-handling branch - MemoryField.init_value - soul.yaml / identity.yaml prompt templates - events.yaml year/month/day folder structure - AGFS Rust binding (volcengine#1221) — confirmed active at runtime Smoke test result (conv-26 session_1, qwopus-4b, temp=0.1): Written: 3, Edited: 0, Deleted: 0, Errors: 0 4 memories total (2 entities, 1 episode, 1 other) Historical baseline variance range: 4-10 memories, median ~6. Result within normal LLM variance. Co-Authored-By: claude-flow <ruv@ruv.net>
1 parent 0313068 commit 8b43ae7

4 files changed

Lines changed: 108 additions & 92 deletions

File tree

openviking/session/memory/memory_type_registry.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -252,19 +252,26 @@ async def initialize_memory_files(self, ctx: Any) -> None:
252252
except Exception:
253253
pass
254254

255-
# Add MEMORY_FIELDS comment with field metadata
256-
# Template rendering is handled inside serialize_with_metadata
255+
# Render content_template inline (dev's serialize_with_metadata takes
256+
# content as a positional arg and does not do template rendering itself).
257257
from openviking.session.memory.utils.content import serialize_with_metadata
258258

259+
try:
260+
rendered_content = env.from_string(schema.content_template).render(
261+
**fields_with_init
262+
)
263+
except Exception as e:
264+
logger.warning(
265+
f"[MemoryTypeRegistry] Failed to render content template for "
266+
f"{schema.memory_type}: {e}"
267+
)
268+
rendered_content = ""
269+
259270
metadata = {
260271
"memory_type": schema.memory_type,
261272
**fields_with_init,
262-
"content": "", # content will come from content_template rendering
263273
}
264-
full_content = serialize_with_metadata(
265-
metadata,
266-
content_template=schema.content_template,
267-
)
274+
full_content = serialize_with_metadata(rendered_content, metadata)
268275

269276
# Write the file
270277
try:

openviking/session/memory/schema_model_generator.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -242,10 +242,10 @@ def create_structured_operations_model(self) -> Type[BaseModel]:
242242
# Build field definitions for each memory_type
243243
field_definitions: Dict[str, Tuple[Type[Any], Any]] = {}
244244

245-
# field_definitions["reasoning"] = (
246-
# str,
247-
# Field("", description="reasoning"),
248-
# )
245+
field_definitions["reasoning"] = (
246+
str,
247+
Field("", description="reasoning"),
248+
)
249249

250250
for mt in enabled_memory_types:
251251
flat_model = self.create_flat_data_model(mt)
@@ -267,17 +267,17 @@ def create_structured_operations_model(self) -> Type[BaseModel]:
267267
)
268268

269269
# Use single generic model for overview edit (same for all memory types)
270-
# generic_overview_edit = self.create_overview_edit_model(
271-
# enabled_memory_types[0] if enabled_memory_types else None
272-
# )
273-
274-
# field_definitions["edit_overview_uris"] = (
275-
# List[generic_overview_edit], # type: ignore
276-
# Field(
277-
# default_factory=list,
278-
# description="Edit operations for .overview.md files using memory_type",
279-
# ),
280-
# )
270+
generic_overview_edit = self.create_overview_edit_model(
271+
enabled_memory_types[0] if enabled_memory_types else None
272+
)
273+
274+
field_definitions["edit_overview_uris"] = (
275+
List[generic_overview_edit], # type: ignore
276+
Field(
277+
default_factory=list,
278+
description="Edit operations for .overview.md files using memory_type",
279+
),
280+
)
281281

282282
field_definitions["delete_uris"] = (
283283
List[str],
@@ -330,7 +330,7 @@ def to_legacy_operations(self) -> Dict[str, Any]:
330330
return {
331331
"write_uris": write_uris,
332332
"edit_uris": edit_uris,
333-
#"edit_overview_uris": self.edit_overview_uris,
333+
"edit_overview_uris": self.edit_overview_uris,
334334
"delete_uris": self.delete_uris,
335335
}
336336

openviking/session/memory/utils/content.py

Lines changed: 11 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -39,47 +39,27 @@ def _deserialize_datetime(metadata: Dict[str, Any]) -> Dict[str, Any]:
3939
return result
4040

4141

42-
def serialize_with_metadata(
43-
metadata: Dict[str, Any],
44-
content_template: str = None,
45-
extract_context: Any = None,
46-
) -> str:
42+
def serialize_with_metadata(content: str, metadata: Dict[str, Any]) -> str:
4743
"""
4844
Serialize content and metadata into a single string.
4945
5046
The metadata is stored in an HTML comment at the end of the content.
5147
5248
Args:
53-
metadata: Dictionary containing all fields including "content".
54-
content is extracted and used as the main body.
55-
content_template: Optional Jinja2 template to render content.
56-
extract_context: Optional context for template rendering.
49+
content: The main memory content (Markdown)
50+
metadata: Dictionary containing metadata fields:
51+
- memory_type: Type of memory (NOT included in output)
52+
- fields: Structured fields (for template mode)
53+
- name: Memory name
54+
- tags: List of tags
55+
- created_at: Creation datetime
56+
- updated_at: Update datetime
57+
- abstract: L0 abstract
58+
- overview: L1 overview
5759
5860
Returns:
5961
Combined string with content followed by metadata in HTML comment
6062
"""
61-
# Extract content from metadata (default to empty string)
62-
content = metadata.pop("content", "") or ""
63-
64-
# Render template if provided
65-
if content_template:
66-
try:
67-
import jinja2
68-
from jinja2 import Environment
69-
70-
env = Environment(autoescape=False, undefined=jinja2.DebugUndefined)
71-
template_vars = metadata.copy()
72-
template_vars["extract_context"] = extract_context
73-
74-
jinja_template = env.from_string(content_template)
75-
content = jinja_template.render(**template_vars).strip()
76-
except Exception:
77-
# If template rendering fails, use content as-is
78-
pass
79-
80-
# Restore metadata (we popped content earlier)
81-
# Note: metadata dict is modified in place, caller should be aware
82-
8363
# Clean metadata - remove None values and memory_type
8464
clean_metadata = {k: v for k, v in metadata.items() if v is not None and k != "memory_type"}
8565

openviking/session/memory/utils/uri.py

Lines changed: 67 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -159,12 +159,12 @@ def generate_uri(
159159
if not uri_template:
160160
raise ValueError("Memory type has neither directory nor filename_template")
161161

162-
# Build the context for Jinja2 rendering - include user_space and agent_space
162+
# Build the context for Jinja2 rendering
163163
context = {
164164
"user_space": user_space,
165165
"agent_space": agent_space,
166166
}
167-
# Add all fields to context (uri_fields with actual values)
167+
# Add all fields to context
168168
context.update(fields)
169169

170170
# Render using unified render_template method (same as content_template)
@@ -336,7 +336,6 @@ def is_uri_allowed_for_schema(
336336
schemas: List[MemoryTypeSchema],
337337
user_space: str = "default",
338338
agent_space: str = "default",
339-
extract_context: Any = None,
340339
) -> bool:
341340
"""
342341
Check if a URI is allowed for the given activated schemas.
@@ -346,15 +345,12 @@ def is_uri_allowed_for_schema(
346345
schemas: List of activated memory type schemas
347346
user_space: User space to substitute for {{ user_space }}
348347
agent_space: Agent space to substitute for {{ agent_space }}
349-
extract_context: ExtractContext instance for template rendering
350348
351349
Returns:
352350
True if the URI is allowed
353351
"""
354352
allowed_dirs = collect_allowed_directories(schemas, user_space, agent_space, extract_context)
355-
allowed_patterns = collect_allowed_path_patterns(
356-
schemas, user_space, agent_space, extract_context
357-
)
353+
allowed_patterns = collect_allowed_path_patterns(schemas, user_space, agent_space, extract_context)
358354
return is_uri_allowed(uri, allowed_dirs, allowed_patterns)
359355

360356

@@ -504,8 +500,8 @@ class ResolvedOperations:
504500
"""Operations with resolved URIs."""
505501

506502
def __init__(self):
507-
# Unified operations list - all are edit (will read existing file first)
508-
self.operations: List[ResolvedOperation] = []
503+
self.write_operations: List[ResolvedOperation] = []
504+
self.edit_operations: List[ResolvedOperation] = []
509505
self.edit_overview_operations: List[
510506
Tuple[Any, str]
511507
] = [] # (overview_edit_model, overview_uri)
@@ -526,9 +522,7 @@ def resolve_all_operations(
526522
"""
527523
Resolve URIs for all operations.
528524
529-
Uses per-memory_type format (e.g., soul, identity fields).
530-
All operations are unified into a single list - each will attempt to read existing
531-
file first, then merge (or write new if not exists).
525+
Supports both legacy format (write_uris/edit_uris) and new per-memory_type format.
532526
533527
Args:
534528
operations: StructuredMemoryOperations
@@ -552,32 +546,67 @@ def resolve_all_operations(
552546
continue
553547
items = value if isinstance(value, list) else [value]
554548
for item in items:
549+
# Determine if edit (has uri) or write
550+
is_edit = False
551+
if hasattr(item, "uri") and item.uri:
552+
is_edit = True
553+
elif isinstance(item, dict) and item.get("uri"):
554+
is_edit = True
555555
# Convert to dict for URI resolution
556556
item_dict = dict(item) if hasattr(item, "model_dump") else dict(item)
557557
try:
558558
uri = resolve_flat_model_uri(
559-
item_dict,
560-
registry,
561-
user_space,
562-
agent_space,
563-
memory_type=field_name,
564-
extract_context=extract_context,
565-
)
566-
# All operations go to unified list - will read existing file first
567-
resolved.operations.append(
568-
ResolvedOperation(model=item_dict, uri=uri, memory_type=field_name)
559+
item_dict, registry, user_space, agent_space,
560+
memory_type=field_name, extract_context=extract_context
569561
)
562+
if is_edit:
563+
resolved.edit_operations.append(
564+
ResolvedOperation(model=item_dict, uri=uri, memory_type=field_name)
565+
)
566+
else:
567+
resolved.write_operations.append(
568+
ResolvedOperation(model=item_dict, uri=uri, memory_type=field_name)
569+
)
570570
except Exception as e:
571571
resolved.errors.append(f"Failed to resolve {field_name} operation: {e}")
572+
else:
573+
# Legacy format
574+
write_uris = operations.write_uris if hasattr(operations, "write_uris") else []
575+
edit_uris = operations.edit_uris if hasattr(operations, "edit_uris") else []
576+
577+
for op in write_uris:
578+
try:
579+
uri = resolve_flat_model_uri(
580+
op, registry, user_space, agent_space, extract_context=extract_context
581+
)
582+
# Legacy format: try to get memory_type from model, otherwise empty
583+
memory_type = op.get("memory_type", "") if isinstance(op, dict) else ""
584+
resolved.write_operations.append(
585+
ResolvedOperation(model=op, uri=uri, memory_type=memory_type)
586+
)
587+
except Exception as e:
588+
resolved.errors.append(f"Failed to resolve write operation: {e}")
589+
590+
for op in edit_uris:
591+
try:
592+
uri = resolve_flat_model_uri(
593+
op, registry, user_space, agent_space, extract_context=extract_context
594+
)
595+
memory_type = op.get("memory_type", "") if isinstance(op, dict) else ""
596+
resolved.edit_operations.append(
597+
ResolvedOperation(model=op, uri=uri, memory_type=memory_type)
598+
)
599+
except Exception as e:
600+
resolved.errors.append(f"Failed to resolve edit operation: {e}")
572601

573602
# Resolve edit_overview operations (overview edit models)
574-
# if hasattr(operations, "edit_overview_uris"):
575-
# for op in operations.edit_overview_uris:
576-
# try:
577-
# uri = resolve_overview_edit_uri(op, registry, user_space, agent_space)
578-
# resolved.edit_overview_operations.append((op, uri))
579-
# except Exception as e:
580-
# resolved.errors.append(f"Failed to resolve edit_overview operation: {e}")
603+
if hasattr(operations, "edit_overview_uris"):
604+
for op in operations.edit_overview_uris:
605+
try:
606+
uri = resolve_overview_edit_uri(op, registry, user_space, agent_space)
607+
resolved.edit_overview_operations.append((op, uri))
608+
except Exception as e:
609+
resolved.errors.append(f"Failed to resolve edit_overview operation: {e}")
581610

582611
# Resolve delete operations (already URI strings)
583612
if hasattr(operations, "delete_uris"):
@@ -614,24 +643,24 @@ def validate_operations_uris(
614643
Tuple of (is_valid, list of error messages)
615644
"""
616645
allowed_dirs = collect_allowed_directories(schemas, user_space, agent_space, extract_context)
617-
allowed_patterns = collect_allowed_path_patterns(
618-
schemas, user_space, agent_space, extract_context
619-
)
646+
allowed_patterns = collect_allowed_path_patterns(schemas, user_space, agent_space, extract_context)
620647

621648
errors = []
622649

623650
# First resolve all URIs
624-
resolved = resolve_all_operations(
625-
operations, registry, user_space, agent_space, extract_context
626-
)
651+
resolved = resolve_all_operations(operations, registry, user_space, agent_space, extract_context)
627652

628653
if resolved.has_errors():
629654
errors.extend(resolved.errors)
630655
else:
631-
# Validate resolved URIs - all operations use unified list
632-
for resolved_op in resolved.operations:
656+
# Validate resolved URIs
657+
for resolved_op in resolved.write_operations:
658+
if not is_uri_allowed(resolved_op.uri, allowed_dirs, allowed_patterns):
659+
errors.append(f"Write operation URI not allowed: {resolved_op.uri}")
660+
661+
for resolved_op in resolved.edit_operations:
633662
if not is_uri_allowed(resolved_op.uri, allowed_dirs, allowed_patterns):
634-
errors.append(f"Operation URI not allowed: {resolved_op.uri}")
663+
errors.append(f"Edit operation URI not allowed: {resolved_op.uri}")
635664

636665
for _op, uri in resolved.edit_overview_operations:
637666
if not is_uri_allowed(uri, allowed_dirs, allowed_patterns):

0 commit comments

Comments
 (0)