From db29b960b28398f6bdd5510de9eb7dec982c3a6d Mon Sep 17 00:00:00 2001 From: Ashish-dwi99 Date: Mon, 9 Feb 2026 14:38:49 +0530 Subject: [PATCH 1/2] v2 --- engram/configs/base.py | 23 +++ engram/core/profile.py | 441 +++++++++++++++++++++++++++++++++++++++++ engram/core/scene.py | 355 +++++++++++++++++++++++++++++++++ engram/db/sqlite.py | 331 +++++++++++++++++++++++++++++++ engram/mcp_server.py | 220 ++++++++++++++++++++ engram/memory/main.py | 197 ++++++++++++++++++ tests/__init__.py | 0 7 files changed, 1567 insertions(+) create mode 100644 engram/core/profile.py create mode 100644 engram/core/scene.py create mode 100644 tests/__init__.py diff --git a/engram/configs/base.py b/engram/configs/base.py index 19e6b48..3ed3978 100644 --- a/engram/configs/base.py +++ b/engram/configs/base.py @@ -93,6 +93,27 @@ class CategoryMemConfig(BaseModel): auto_create_subcategories: bool = True # Allow dynamic subcategory creation +class SceneConfig(BaseModel): + """Configuration for episodic scene grouping.""" + enable_scenes: bool = True + scene_time_gap_minutes: int = 30 # gap > this = new scene + scene_topic_threshold: float = 0.55 # cosine sim below this = topic shift + auto_close_inactive_minutes: int = 120 + max_scene_memories: int = 50 + use_llm_summarization: bool = True + summary_regenerate_threshold: int = 5 + + +class ProfileConfig(BaseModel): + """Configuration for character profile tracking.""" + enable_profiles: bool = True + auto_detect_profiles: bool = True + use_llm_extraction: bool = True + narrative_regenerate_threshold: int = 10 + self_profile_auto_create: bool = True + max_facts_per_profile: int = 100 + + class ScopeConfig(BaseModel): """Configuration for scope-aware sharing weights.""" agent_weight: float = 1.0 @@ -137,3 +158,5 @@ class MemoryConfig(BaseModel): category: CategoryMemConfig = Field(default_factory=CategoryMemConfig) scope: ScopeConfig = Field(default_factory=ScopeConfig) graph: KnowledgeGraphConfig = Field(default_factory=lambda: KnowledgeGraphConfig()) + scene: SceneConfig = Field(default_factory=SceneConfig) + profile: ProfileConfig = Field(default_factory=ProfileConfig) diff --git a/engram/core/profile.py b/engram/core/profile.py new file mode 100644 index 0000000..45c64f7 --- /dev/null +++ b/engram/core/profile.py @@ -0,0 +1,441 @@ +""" +Character Profile Processor for engram. + +Tracks people and entities mentioned in memories, building rich profiles: +- Facts, preferences, relationships +- Sentiment tracking +- LLM-generated narrative summaries +- Self-profile auto-creation (for "I prefer...", "my name is...") +- Fuzzy name matching via aliases +""" + +from __future__ import annotations + +import json +import logging +import re +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Set + +logger = logging.getLogger(__name__) + + +@dataclass +class ProfileUpdate: + """A detected update to apply to a profile.""" + profile_name: str + profile_type: str = "contact" # self | contact | entity + new_facts: List[str] = field(default_factory=list) + new_preferences: List[str] = field(default_factory=list) + new_relationships: List[Dict[str, str]] = field(default_factory=list) + sentiment: Optional[str] = None + is_new: bool = False + + +# Patterns for self-referential statements +_SELF_PATTERNS = [ + re.compile(r"\b(?:I|my|me)\s+(?:prefer|like|love|use|want|need|enjoy|hate|dislike)\b", re.IGNORECASE), + re.compile(r"\bmy\s+(?:name|email|job|role|title|team|company|favorite|preferred)\b", re.IGNORECASE), + re.compile(r"\bI(?:'m| am)\s+(?:a|an|the)\s+", re.IGNORECASE), + re.compile(r"\bcall me\b", re.IGNORECASE), +] + +# Patterns for third-person mentions +_PERSON_PATTERN = re.compile( + r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b" # Two+ capitalized words = likely a name +) + +_PREFERENCE_EXTRACT = re.compile( + r"(?:I|my)\s+(?:prefer|like|love|use|want|enjoy|favorite)\s+(.+?)(?:\.|,|$)", + re.IGNORECASE, +) + +_NAME_EXTRACT = re.compile( + r"(?:my name is|call me|I'm|I am)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)", + re.IGNORECASE, +) + + +def _cosine_similarity(a: List[float], b: List[float]) -> float: + if not a or not b or len(a) != len(b): + return 0.0 + dot = sum(x * y for x, y in zip(a, b)) + norm_a = sum(x * x for x in a) ** 0.5 + norm_b = sum(x * x for x in b) ** 0.5 + if norm_a == 0 or norm_b == 0: + return 0.0 + return dot / (norm_a * norm_b) + + +class ProfileProcessor: + """Manages character profile detection, creation, and updates.""" + + def __init__( + self, + db, + embedder=None, + llm=None, + config: Optional[Dict[str, Any]] = None, + ): + self.db = db + self.embedder = embedder + self.llm = llm + cfg = config or {} + self.auto_detect = cfg.get("auto_detect_profiles", True) + self.use_llm_extraction = cfg.get("use_llm_extraction", True) + self.narrative_regen_threshold = cfg.get("narrative_regenerate_threshold", 10) + self.self_auto_create = cfg.get("self_profile_auto_create", True) + self.max_facts = cfg.get("max_facts_per_profile", 100) + # Track updates since last narrative regeneration + self._update_counts: Dict[str, int] = {} + + # ------------------------------------------------------------------ + # Extraction + # ------------------------------------------------------------------ + + def extract_profile_mentions( + self, + content: str, + metadata: Optional[Dict[str, Any]] = None, + user_id: Optional[str] = None, + ) -> List[ProfileUpdate]: + """Extract profile mentions from memory content.""" + updates: List[ProfileUpdate] = [] + + # Self-profile updates + is_self_ref = any(p.search(content) for p in _SELF_PATTERNS) + if is_self_ref: + update = ProfileUpdate( + profile_name="self", + profile_type="self", + ) + # Extract preferences + for match in _PREFERENCE_EXTRACT.finditer(content): + pref = match.group(1).strip() + if pref: + update.new_preferences.append(pref) + + # Extract name + name_match = _NAME_EXTRACT.search(content) + if name_match: + update.new_facts.append(f"Name: {name_match.group(1)}") + + # General self-facts + if not update.new_preferences and not update.new_facts: + update.new_facts.append(content.strip()) + + updates.append(update) + + # Third-person mentions + if self.auto_detect: + seen_names: Set[str] = set() + for match in _PERSON_PATTERN.finditer(content): + name = match.group(1).strip() + # Filter out common false positives + if name.lower() in {"the user", "the system", "the app", "the team"}: + continue + if name not in seen_names: + seen_names.add(name) + update = ProfileUpdate( + profile_name=name, + profile_type="contact", + new_facts=[content.strip()], + ) + updates.append(update) + + # LLM extraction for richer profiles + if self.use_llm_extraction and self.llm and not updates: + llm_updates = self._extract_with_llm(content) + updates.extend(llm_updates) + + return updates + + def _extract_with_llm(self, content: str) -> List[ProfileUpdate]: + """Use LLM to extract person mentions and facts.""" + prompt = ( + "Extract any people or entities mentioned in the following text. " + "Return a JSON array of objects with fields: " + '"name" (string), "type" ("self"|"contact"|"entity"), ' + '"facts" (array of strings), "preferences" (array of strings).\n' + "If no people are mentioned, return an empty array.\n\n" + f"Text: {content}\n\nJSON:" + ) + try: + response = self.llm.generate(prompt) + json_match = re.search(r"\[.*\]", response, re.DOTALL) + if json_match: + data = json.loads(json_match.group()) + updates = [] + for item in data: + name = item.get("name", "").strip() + if name: + updates.append(ProfileUpdate( + profile_name=name, + profile_type=item.get("type", "contact"), + new_facts=item.get("facts", []), + new_preferences=item.get("preferences", []), + )) + return updates + except Exception as e: + logger.warning(f"LLM profile extraction failed: {e}") + return [] + + # ------------------------------------------------------------------ + # Profile lifecycle + # ------------------------------------------------------------------ + + def ensure_self_profile(self, user_id: str) -> Dict[str, Any]: + """Create or return the self-profile for a user.""" + existing = self.db.get_profile_by_name("self", user_id=user_id) + if existing: + return existing + + profile_id = str(uuid.uuid4()) + profile_data = { + "id": profile_id, + "user_id": user_id, + "name": "self", + "profile_type": "self", + "narrative": "The user's self-profile. Updated automatically from first-person statements.", + "facts": [], + "preferences": [], + "relationships": [], + } + self.db.add_profile(profile_data) + return profile_data + + def apply_update( + self, + profile_update: ProfileUpdate, + memory_id: str, + user_id: str, + ) -> str: + """Apply a ProfileUpdate to an existing or new profile. Returns profile_id.""" + name = profile_update.profile_name + + # Find existing profile + if name == "self" or profile_update.profile_type == "self": + profile = self.db.get_profile_by_name("self", user_id=user_id) + if not profile and self.self_auto_create: + profile = self.ensure_self_profile(user_id) + else: + profile = self._find_profile(name, user_id) + + if profile: + profile_id = profile["id"] + self._merge_into_profile(profile, profile_update) + else: + # Create new profile + profile_id = str(uuid.uuid4()) + embedding = None + if self.embedder: + embedding = self.embedder.embed(name, memory_action="add") + profile_data = { + "id": profile_id, + "user_id": user_id, + "name": name, + "profile_type": profile_update.profile_type, + "facts": profile_update.new_facts[:self.max_facts], + "preferences": profile_update.new_preferences, + "relationships": profile_update.new_relationships, + "sentiment": profile_update.sentiment, + "embedding": embedding, + } + self.db.add_profile(profile_data) + + # Link memory + role = "about" if profile_update.profile_type == "self" else "mentioned" + self.db.add_profile_memory(profile_id, memory_id, role=role) + + # Track updates for narrative regeneration + count = self._update_counts.get(profile_id, 0) + 1 + self._update_counts[profile_id] = count + if count >= self.narrative_regen_threshold: + self._regenerate_narrative(profile_id) + self._update_counts[profile_id] = 0 + + return profile_id + + def _find_profile(self, name: str, user_id: str) -> Optional[Dict[str, Any]]: + """Find a profile by name or alias, with fuzzy matching.""" + # Exact match first + profile = self.db.get_profile_by_name(name, user_id=user_id) + if profile: + return profile + + # Check all profiles for partial match + all_profiles = self.db.get_all_profiles(user_id=user_id) + name_lower = name.lower() + for p in all_profiles: + p_name = p["name"].lower() + aliases = [a.lower() for a in p.get("aliases", [])] + # Substring match (e.g. "John" matches "John Smith") + if name_lower in p_name or p_name in name_lower: + return p + if any(name_lower in a or a in name_lower for a in aliases): + return p + + return None + + def _merge_into_profile( + self, profile: Dict[str, Any], update: ProfileUpdate + ) -> None: + """Merge new facts/preferences into an existing profile.""" + changes: Dict[str, Any] = {} + + # Merge facts (deduplicate) + existing_facts = list(profile.get("facts", [])) + existing_set = {f.lower() for f in existing_facts} + for fact in update.new_facts: + if fact.lower() not in existing_set and len(existing_facts) < self.max_facts: + existing_facts.append(fact) + existing_set.add(fact.lower()) + if len(existing_facts) != len(profile.get("facts", [])): + changes["facts"] = existing_facts + + # Merge preferences + existing_prefs = list(profile.get("preferences", [])) + existing_pref_set = {p.lower() for p in existing_prefs} + for pref in update.new_preferences: + if pref.lower() not in existing_pref_set: + existing_prefs.append(pref) + existing_pref_set.add(pref.lower()) + if len(existing_prefs) != len(profile.get("preferences", [])): + changes["preferences"] = existing_prefs + + # Merge relationships + existing_rels = list(profile.get("relationships", [])) + for rel in update.new_relationships: + if rel not in existing_rels: + existing_rels.append(rel) + if len(existing_rels) != len(profile.get("relationships", [])): + changes["relationships"] = existing_rels + + # Update sentiment + if update.sentiment: + changes["sentiment"] = update.sentiment + + # Add name as alias if different from profile name + if ( + update.profile_name != profile["name"] + and update.profile_name.lower() != "self" + ): + aliases = list(profile.get("aliases", [])) + if update.profile_name not in aliases: + aliases.append(update.profile_name) + changes["aliases"] = aliases + + if changes: + self.db.update_profile(profile["id"], changes) + + # ------------------------------------------------------------------ + # Narrative + # ------------------------------------------------------------------ + + def _regenerate_narrative(self, profile_id: str) -> None: + """Regenerate the narrative summary for a profile.""" + if not self.llm: + return + + profile = self.db.get_profile(profile_id) + if not profile: + return + + memories = self.db.get_profile_memories(profile_id) + narrative = self.generate_narrative(profile, memories) + if narrative: + self.db.update_profile(profile_id, {"narrative": narrative}) + + def generate_narrative( + self, + profile: Dict[str, Any], + memories: Optional[List[Dict[str, Any]]] = None, + ) -> Optional[str]: + """Generate a narrative summary for a profile.""" + if not self.llm: + return None + + name = profile.get("name", "Unknown") + p_type = profile.get("profile_type", "contact") + facts = profile.get("facts", [])[:20] + prefs = profile.get("preferences", [])[:10] + + facts_text = "\n".join(f"- {f}" for f in facts) if facts else "None" + prefs_text = "\n".join(f"- {p}" for p in prefs) if prefs else "None" + + memory_texts = "" + if memories: + texts = [m.get("memory", "") for m in memories[:10] if m.get("memory")] + memory_texts = "\n".join(f"- {t}" for t in texts) + + if p_type == "self": + prompt = ( + "Write a concise first-person profile summary (2-3 sentences) based on:\n\n" + f"Known facts:\n{facts_text}\n\n" + f"Preferences:\n{prefs_text}\n\n" + f"Recent memories:\n{memory_texts}\n\n" + "Summary:" + ) + else: + prompt = ( + f"Write a concise profile summary (2-3 sentences) about {name} based on:\n\n" + f"Known facts:\n{facts_text}\n\n" + f"Preferences:\n{prefs_text}\n\n" + f"Recent related memories:\n{memory_texts}\n\n" + "Summary:" + ) + + try: + return self.llm.generate(prompt).strip() + except Exception as e: + logger.warning(f"Profile narrative generation failed: {e}") + return None + + # ------------------------------------------------------------------ + # Search + # ------------------------------------------------------------------ + + def search_profiles( + self, + query: str, + user_id: Optional[str] = None, + limit: int = 10, + ) -> List[Dict[str, Any]]: + """Search profiles by name, facts, or semantic similarity.""" + all_profiles = self.db.get_all_profiles(user_id=user_id) + if not all_profiles: + return [] + + if self.embedder: + query_embedding = self.embedder.embed(query, memory_action="search") + scored = [] + for p in all_profiles: + p_emb = p.get("embedding") + if p_emb: + sim = _cosine_similarity(query_embedding, p_emb) + scored.append((p, sim)) + else: + # Keyword fallback + text = f"{p.get('name', '')} {' '.join(p.get('facts', []))} {' '.join(p.get('preferences', []))}".lower() + kw_score = sum(1 for w in query.lower().split() if w in text) * 0.1 + if kw_score > 0: + scored.append((p, kw_score)) + scored.sort(key=lambda x: x[1], reverse=True) + results = [] + for p, score in scored[:limit]: + p["search_score"] = round(score, 4) + results.append(p) + return results + else: + # Keyword-only search + query_lower = query.lower() + scored = [] + for p in all_profiles: + text = f"{p.get('name', '')} {' '.join(p.get('facts', []))} {' '.join(p.get('preferences', []))}".lower() + score = sum(1 for w in query_lower.split() if w in text) + if score > 0 or query_lower in p.get("name", "").lower(): + scored.append((p, score + (1 if query_lower in p.get("name", "").lower() else 0))) + scored.sort(key=lambda x: x[1], reverse=True) + return [p for p, _ in scored[:limit]] diff --git a/engram/core/scene.py b/engram/core/scene.py new file mode 100644 index 0000000..b213cc4 --- /dev/null +++ b/engram/core/scene.py @@ -0,0 +1,355 @@ +""" +Episodic Scene Processor for engram. + +Groups memories into coherent episodic scenes based on: +1. Time gaps - large temporal gaps signal new scenes +2. Topic shifts - cosine similarity drops signal topic changes +3. Location changes - detected location mentions changing + +Scenes get LLM-generated summaries and are searchable by semantic similarity. +""" + +from __future__ import annotations + +import logging +import re +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class SceneDetectionResult: + """Result of scene boundary detection.""" + is_new_scene: bool + reason: Optional[str] = None # "time_gap", "topic_shift", "location_change" + detected_location: Optional[str] = None + topic_similarity: Optional[float] = None + + +# Common location prepositions/patterns +_LOCATION_PATTERN = re.compile( + r'(?:at|in|from|near|visiting|located in|based in|went to|going to|arrived at)\s+' + r'([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*)', + re.IGNORECASE, +) + + +def _cosine_similarity(a: List[float], b: List[float]) -> float: + """Compute cosine similarity between two vectors.""" + if not a or not b or len(a) != len(b): + return 0.0 + dot = sum(x * y for x, y in zip(a, b)) + norm_a = sum(x * x for x in a) ** 0.5 + norm_b = sum(x * x for x in b) ** 0.5 + if norm_a == 0 or norm_b == 0: + return 0.0 + return dot / (norm_a * norm_b) + + +def _detect_location(content: str) -> Optional[str]: + """Extract a location mention from text.""" + match = _LOCATION_PATTERN.search(content) + if match: + loc = match.group(1).strip() + if len(loc) > 2: + return loc + return None + + +class SceneProcessor: + """Manages episodic scene detection, creation, and summarization.""" + + def __init__( + self, + db, + embedder=None, + llm=None, + config: Optional[Dict[str, Any]] = None, + ): + self.db = db + self.embedder = embedder + self.llm = llm + cfg = config or {} + self.time_gap_minutes = cfg.get("scene_time_gap_minutes", 30) + self.topic_threshold = cfg.get("scene_topic_threshold", 0.55) + self.auto_close_minutes = cfg.get("auto_close_inactive_minutes", 120) + self.max_scene_memories = cfg.get("max_scene_memories", 50) + self.use_llm_summarization = cfg.get("use_llm_summarization", True) + self.summary_regen_threshold = cfg.get("summary_regenerate_threshold", 5) + + # ------------------------------------------------------------------ + # Boundary detection + # ------------------------------------------------------------------ + + def detect_boundary( + self, + content: str, + timestamp: str, + current_scene: Optional[Dict[str, Any]], + embedding: Optional[List[float]] = None, + ) -> SceneDetectionResult: + """Decide whether this memory starts a new scene or continues the current one.""" + + if current_scene is None: + return SceneDetectionResult(is_new_scene=True, reason="no_scene") + + # 1. Time gap + scene_end = current_scene.get("end_time") or current_scene.get("start_time") + if scene_end and timestamp: + try: + last_dt = datetime.fromisoformat(scene_end) + new_dt = datetime.fromisoformat(timestamp) + gap = (new_dt - last_dt).total_seconds() / 60.0 + if gap > self.time_gap_minutes: + return SceneDetectionResult(is_new_scene=True, reason="time_gap") + except (ValueError, TypeError): + pass + + # 2. Max memories + memory_ids = current_scene.get("memory_ids", []) + if len(memory_ids) >= self.max_scene_memories: + return SceneDetectionResult(is_new_scene=True, reason="max_memories") + + # 3. Topic shift (cosine similarity) + scene_embedding = current_scene.get("embedding") + if embedding and scene_embedding: + sim = _cosine_similarity(embedding, scene_embedding) + if sim < self.topic_threshold: + return SceneDetectionResult( + is_new_scene=True, + reason="topic_shift", + topic_similarity=sim, + ) + + # 4. Location change + scene_location = current_scene.get("location") + detected_location = _detect_location(content) + if ( + scene_location + and detected_location + and scene_location.lower() != detected_location.lower() + ): + return SceneDetectionResult( + is_new_scene=True, + reason="location_change", + detected_location=detected_location, + ) + + return SceneDetectionResult( + is_new_scene=False, + detected_location=detected_location, + topic_similarity=( + _cosine_similarity(embedding, scene_embedding) + if embedding and scene_embedding + else None + ), + ) + + # ------------------------------------------------------------------ + # Scene lifecycle + # ------------------------------------------------------------------ + + def create_scene( + self, + first_memory_id: str, + user_id: str, + timestamp: str, + topic: Optional[str] = None, + location: Optional[str] = None, + participants: Optional[List[str]] = None, + embedding: Optional[List[float]] = None, + ) -> Dict[str, Any]: + """Create a new scene and add the first memory to it.""" + scene_id = str(uuid.uuid4()) + scene_data = { + "id": scene_id, + "user_id": user_id, + "title": topic or "Untitled scene", + "topic": topic, + "location": location, + "participants": participants or [], + "memory_ids": [first_memory_id], + "start_time": timestamp, + "end_time": None, + "embedding": embedding, + "strength": 1.0, + } + self.db.add_scene(scene_data) + self.db.add_scene_memory(scene_id, first_memory_id, position=0) + try: + self.db.update_memory(first_memory_id, {"scene_id": scene_id}) + except Exception: + pass # scene_id column may not exist in very old DBs + return scene_data + + def add_memory_to_scene( + self, + scene_id: str, + memory_id: str, + embedding: Optional[List[float]] = None, + timestamp: Optional[str] = None, + ) -> None: + """Append a memory to an existing scene.""" + scene = self.db.get_scene(scene_id) + if not scene: + return + + memory_ids = scene.get("memory_ids", []) + position = len(memory_ids) + memory_ids.append(memory_id) + + updates: Dict[str, Any] = {"memory_ids": memory_ids} + if timestamp: + updates["end_time"] = timestamp + + # Running average of embeddings + if embedding and scene.get("embedding"): + old_emb = scene["embedding"] + n = max(position, 1) + updates["embedding"] = [ + (old_emb[i] * n + embedding[i]) / (n + 1) + for i in range(len(embedding)) + ] + + self.db.update_scene(scene_id, updates) + self.db.add_scene_memory(scene_id, memory_id, position=position) + try: + self.db.update_memory(memory_id, {"scene_id": scene_id}) + except Exception: + pass + + def close_scene(self, scene_id: str, timestamp: Optional[str] = None) -> None: + """Close a scene: set end_time and generate summary.""" + scene = self.db.get_scene(scene_id) + if not scene: + return + + updates: Dict[str, Any] = {} + if not scene.get("end_time"): + updates["end_time"] = timestamp or datetime.utcnow().isoformat() + + # Generate summary + if self.use_llm_summarization and self.llm: + memories = self.db.get_scene_memories(scene_id) + summary = self._summarize_scene(scene, memories) + if summary: + updates["summary"] = summary + # Derive title from summary + title = summary.split(".")[0][:120] + updates["title"] = title + + if updates: + self.db.update_scene(scene_id, updates) + + def auto_close_stale(self, user_id: str) -> List[str]: + """Close scenes that have been inactive beyond the auto-close threshold.""" + open_scene = self.db.get_open_scene(user_id) + if not open_scene: + return [] + + end_time = open_scene.get("end_time") or open_scene.get("start_time") + if not end_time: + return [] + + try: + last_dt = datetime.fromisoformat(end_time) + if datetime.utcnow() - last_dt > timedelta(minutes=self.auto_close_minutes): + self.close_scene(open_scene["id"]) + return [open_scene["id"]] + except (ValueError, TypeError): + pass + return [] + + # ------------------------------------------------------------------ + # Summarization + # ------------------------------------------------------------------ + + def _summarize_scene( + self, scene: Dict[str, Any], memories: List[Dict[str, Any]] + ) -> Optional[str]: + """Generate an LLM summary of a scene's memories.""" + if not self.llm or not memories: + return None + + memory_texts = [m.get("memory", "") for m in memories if m.get("memory")] + if not memory_texts: + return None + + numbered = "\n".join(f"{i+1}. {t}" for i, t in enumerate(memory_texts[:20])) + prompt = ( + "Summarize the following sequence of memories into a concise episodic narrative " + "(2-4 sentences). Focus on what happened, who was involved, and key outcomes.\n\n" + f"Topic: {scene.get('topic', 'unknown')}\n" + f"Location: {scene.get('location', 'unknown')}\n\n" + f"Memories:\n{numbered}\n\n" + "Summary:" + ) + + try: + return self.llm.generate(prompt).strip() + except Exception as e: + logger.warning(f"Scene summarization failed: {e}") + return None + + # ------------------------------------------------------------------ + # Search + # ------------------------------------------------------------------ + + def search_scenes( + self, + query: str, + user_id: Optional[str] = None, + limit: int = 10, + ) -> List[Dict[str, Any]]: + """Search scenes by matching query against summaries and topics.""" + all_scenes = self.db.get_scenes(user_id=user_id, limit=limit * 5) + if not all_scenes: + return [] + + if not self.embedder: + # Fallback: keyword match + query_lower = query.lower() + scored = [] + for s in all_scenes: + text = f"{s.get('title', '')} {s.get('summary', '')} {s.get('topic', '')}".lower() + score = sum(1 for w in query_lower.split() if w in text) + if score > 0: + scored.append((s, score)) + scored.sort(key=lambda x: x[1], reverse=True) + return [s for s, _ in scored[:limit]] + + query_embedding = self.embedder.embed(query, memory_action="search") + scored = [] + for s in all_scenes: + scene_emb = s.get("embedding") + if scene_emb: + sim = _cosine_similarity(query_embedding, scene_emb) + scored.append((s, sim)) + else: + # Fallback to text match + text = f"{s.get('title', '')} {s.get('summary', '')} {s.get('topic', '')}".lower() + keyword_score = sum(1 for w in query.lower().split() if w in text) * 0.1 + if keyword_score > 0: + scored.append((s, keyword_score)) + + scored.sort(key=lambda x: x[1], reverse=True) + results = [] + for s, score in scored[:limit]: + s["search_score"] = round(score, 4) + results.append(s) + return results + + def get_scene_timeline( + self, + user_id: str, + limit: int = 20, + ) -> List[Dict[str, Any]]: + """Get scenes in chronological order for timeline view.""" + scenes = self.db.get_scenes(user_id=user_id, limit=limit) + # Reverse to chronological (oldest first) + scenes.reverse() + return scenes diff --git a/engram/db/sqlite.py b/engram/db/sqlite.py index 77428ee..f602e8a 100644 --- a/engram/db/sqlite.py +++ b/engram/db/sqlite.py @@ -94,8 +94,74 @@ def _init_db(self) -> None: CREATE INDEX IF NOT EXISTS idx_category_type ON categories(category_type); CREATE INDEX IF NOT EXISTS idx_category_parent ON categories(parent_id); CREATE INDEX IF NOT EXISTS idx_category_strength ON categories(strength DESC); + + -- Episodic scenes + CREATE TABLE IF NOT EXISTS scenes ( + id TEXT PRIMARY KEY, + user_id TEXT, + title TEXT, + summary TEXT, + topic TEXT, + location TEXT, + participants TEXT DEFAULT '[]', + memory_ids TEXT DEFAULT '[]', + start_time TEXT, + end_time TEXT, + embedding TEXT, + strength REAL DEFAULT 1.0, + access_count INTEGER DEFAULT 0, + tombstone INTEGER DEFAULT 0 + ); + + CREATE INDEX IF NOT EXISTS idx_scene_user ON scenes(user_id); + CREATE INDEX IF NOT EXISTS idx_scene_start ON scenes(start_time DESC); + + -- Scene-Memory junction + CREATE TABLE IF NOT EXISTS scene_memories ( + scene_id TEXT NOT NULL, + memory_id TEXT NOT NULL, + position INTEGER DEFAULT 0, + PRIMARY KEY (scene_id, memory_id), + FOREIGN KEY (scene_id) REFERENCES scenes(id), + FOREIGN KEY (memory_id) REFERENCES memories(id) + ); + + -- Character profiles + CREATE TABLE IF NOT EXISTS profiles ( + id TEXT PRIMARY KEY, + user_id TEXT, + name TEXT NOT NULL, + profile_type TEXT DEFAULT 'contact' CHECK (profile_type IN ('self', 'contact', 'entity')), + narrative TEXT, + facts TEXT DEFAULT '[]', + preferences TEXT DEFAULT '[]', + relationships TEXT DEFAULT '[]', + sentiment TEXT, + theory_of_mind TEXT DEFAULT '{}', + aliases TEXT DEFAULT '[]', + embedding TEXT, + strength REAL DEFAULT 1.0, + created_at TEXT DEFAULT CURRENT_TIMESTAMP, + updated_at TEXT DEFAULT CURRENT_TIMESTAMP + ); + + CREATE INDEX IF NOT EXISTS idx_profile_user ON profiles(user_id); + CREATE INDEX IF NOT EXISTS idx_profile_name ON profiles(name); + CREATE INDEX IF NOT EXISTS idx_profile_type ON profiles(profile_type); + + -- Profile-Memory junction + CREATE TABLE IF NOT EXISTS profile_memories ( + profile_id TEXT NOT NULL, + memory_id TEXT NOT NULL, + role TEXT DEFAULT 'mentioned' CHECK (role IN ('subject', 'mentioned', 'about')), + PRIMARY KEY (profile_id, memory_id), + FOREIGN KEY (profile_id) REFERENCES profiles(id), + FOREIGN KEY (memory_id) REFERENCES memories(id) + ); """ ) + # Migration: add scene_id column to memories if missing + self._migrate_add_column("memories", "scene_id", "TEXT") @contextmanager def _get_connection(self): @@ -394,6 +460,271 @@ def _category_row_to_dict(self, row: sqlite3.Row) -> Dict[str, Any]: data["embedding"] = json.loads(data["embedding"]) return data + def _migrate_add_column(self, table: str, column: str, col_type: str) -> None: + """Add a column to an existing table if it doesn't already exist.""" + try: + with self._get_connection() as conn: + conn.execute(f"ALTER TABLE {table} ADD COLUMN {column} {col_type}") + except sqlite3.OperationalError: + pass # Column already exists + + # ========================================================================= + # Scene methods + # ========================================================================= + + def add_scene(self, scene_data: Dict[str, Any]) -> str: + scene_id = scene_data.get("id", str(uuid.uuid4())) + with self._get_connection() as conn: + conn.execute( + """ + INSERT INTO scenes ( + id, user_id, title, summary, topic, location, + participants, memory_ids, start_time, end_time, + embedding, strength, access_count, tombstone + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + scene_id, + scene_data.get("user_id"), + scene_data.get("title"), + scene_data.get("summary"), + scene_data.get("topic"), + scene_data.get("location"), + json.dumps(scene_data.get("participants", [])), + json.dumps(scene_data.get("memory_ids", [])), + scene_data.get("start_time"), + scene_data.get("end_time"), + json.dumps(scene_data.get("embedding")) if scene_data.get("embedding") else None, + scene_data.get("strength", 1.0), + scene_data.get("access_count", 0), + 1 if scene_data.get("tombstone", False) else 0, + ), + ) + return scene_id + + def get_scene(self, scene_id: str) -> Optional[Dict[str, Any]]: + with self._get_connection() as conn: + row = conn.execute( + "SELECT * FROM scenes WHERE id = ? AND tombstone = 0", (scene_id,) + ).fetchone() + if row: + return self._scene_row_to_dict(row) + return None + + def update_scene(self, scene_id: str, updates: Dict[str, Any]) -> bool: + set_clauses = [] + params: List[Any] = [] + for key, value in updates.items(): + if key in {"participants", "memory_ids", "embedding"}: + value = json.dumps(value) + set_clauses.append(f"{key} = ?") + params.append(value) + if not set_clauses: + return False + params.append(scene_id) + with self._get_connection() as conn: + conn.execute( + f"UPDATE scenes SET {', '.join(set_clauses)} WHERE id = ?", + params, + ) + return True + + def get_open_scene(self, user_id: str) -> Optional[Dict[str, Any]]: + """Get the most recent scene without an end_time for a user.""" + with self._get_connection() as conn: + row = conn.execute( + """ + SELECT * FROM scenes + WHERE user_id = ? AND end_time IS NULL AND tombstone = 0 + ORDER BY start_time DESC LIMIT 1 + """, + (user_id,), + ).fetchone() + if row: + return self._scene_row_to_dict(row) + return None + + def get_scenes( + self, + user_id: Optional[str] = None, + topic: Optional[str] = None, + start_after: Optional[str] = None, + start_before: Optional[str] = None, + limit: int = 50, + ) -> List[Dict[str, Any]]: + query = "SELECT * FROM scenes WHERE tombstone = 0" + params: List[Any] = [] + if user_id: + query += " AND user_id = ?" + params.append(user_id) + if topic: + query += " AND topic LIKE ?" + params.append(f"%{topic}%") + if start_after: + query += " AND start_time >= ?" + params.append(start_after) + if start_before: + query += " AND start_time <= ?" + params.append(start_before) + query += " ORDER BY start_time DESC LIMIT ?" + params.append(limit) + with self._get_connection() as conn: + rows = conn.execute(query, params).fetchall() + return [self._scene_row_to_dict(row) for row in rows] + + def add_scene_memory(self, scene_id: str, memory_id: str, position: int = 0) -> None: + with self._get_connection() as conn: + conn.execute( + "INSERT OR IGNORE INTO scene_memories (scene_id, memory_id, position) VALUES (?, ?, ?)", + (scene_id, memory_id, position), + ) + + def get_scene_memories(self, scene_id: str) -> List[Dict[str, Any]]: + with self._get_connection() as conn: + rows = conn.execute( + """ + SELECT m.* FROM memories m + JOIN scene_memories sm ON m.id = sm.memory_id + WHERE sm.scene_id = ? AND m.tombstone = 0 + ORDER BY sm.position + """, + (scene_id,), + ).fetchall() + return [self._row_to_dict(row) for row in rows] + + def _scene_row_to_dict(self, row: sqlite3.Row) -> Dict[str, Any]: + data = dict(row) + for key in ["participants", "memory_ids"]: + if key in data and data[key]: + data[key] = json.loads(data[key]) + else: + data[key] = [] + if data.get("embedding"): + data["embedding"] = json.loads(data["embedding"]) + data["tombstone"] = bool(data.get("tombstone", 0)) + return data + + # ========================================================================= + # Profile methods + # ========================================================================= + + def add_profile(self, profile_data: Dict[str, Any]) -> str: + profile_id = profile_data.get("id", str(uuid.uuid4())) + now = datetime.utcnow().isoformat() + with self._get_connection() as conn: + conn.execute( + """ + INSERT INTO profiles ( + id, user_id, name, profile_type, narrative, + facts, preferences, relationships, sentiment, + theory_of_mind, aliases, embedding, strength, + created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + profile_id, + profile_data.get("user_id"), + profile_data.get("name", ""), + profile_data.get("profile_type", "contact"), + profile_data.get("narrative"), + json.dumps(profile_data.get("facts", [])), + json.dumps(profile_data.get("preferences", [])), + json.dumps(profile_data.get("relationships", [])), + profile_data.get("sentiment"), + json.dumps(profile_data.get("theory_of_mind", {})), + json.dumps(profile_data.get("aliases", [])), + json.dumps(profile_data.get("embedding")) if profile_data.get("embedding") else None, + profile_data.get("strength", 1.0), + profile_data.get("created_at", now), + profile_data.get("updated_at", now), + ), + ) + return profile_id + + def get_profile(self, profile_id: str) -> Optional[Dict[str, Any]]: + with self._get_connection() as conn: + row = conn.execute( + "SELECT * FROM profiles WHERE id = ?", (profile_id,) + ).fetchone() + if row: + return self._profile_row_to_dict(row) + return None + + def update_profile(self, profile_id: str, updates: Dict[str, Any]) -> bool: + set_clauses = [] + params: List[Any] = [] + for key, value in updates.items(): + if key in {"facts", "preferences", "relationships", "aliases", "theory_of_mind", "embedding"}: + value = json.dumps(value) + set_clauses.append(f"{key} = ?") + params.append(value) + set_clauses.append("updated_at = ?") + params.append(datetime.utcnow().isoformat()) + params.append(profile_id) + with self._get_connection() as conn: + conn.execute( + f"UPDATE profiles SET {', '.join(set_clauses)} WHERE id = ?", + params, + ) + return True + + def get_all_profiles(self, user_id: Optional[str] = None) -> List[Dict[str, Any]]: + query = "SELECT * FROM profiles" + params: List[Any] = [] + if user_id: + query += " WHERE user_id = ?" + params.append(user_id) + query += " ORDER BY strength DESC" + with self._get_connection() as conn: + rows = conn.execute(query, params).fetchall() + return [self._profile_row_to_dict(row) for row in rows] + + def get_profile_by_name(self, name: str, user_id: Optional[str] = None) -> Optional[Dict[str, Any]]: + """Find a profile by exact name or alias match.""" + profiles = self.get_all_profiles(user_id=user_id) + name_lower = name.lower() + for p in profiles: + if p["name"].lower() == name_lower: + return p + if name_lower in [a.lower() for a in p.get("aliases", [])]: + return p + return None + + def add_profile_memory(self, profile_id: str, memory_id: str, role: str = "mentioned") -> None: + with self._get_connection() as conn: + conn.execute( + "INSERT OR IGNORE INTO profile_memories (profile_id, memory_id, role) VALUES (?, ?, ?)", + (profile_id, memory_id, role), + ) + + def get_profile_memories(self, profile_id: str) -> List[Dict[str, Any]]: + with self._get_connection() as conn: + rows = conn.execute( + """ + SELECT m.*, pm.role AS profile_role FROM memories m + JOIN profile_memories pm ON m.id = pm.memory_id + WHERE pm.profile_id = ? AND m.tombstone = 0 + ORDER BY m.created_at DESC + """, + (profile_id,), + ).fetchall() + return [self._row_to_dict(row) for row in rows] + + def _profile_row_to_dict(self, row: sqlite3.Row) -> Dict[str, Any]: + data = dict(row) + for key in ["facts", "preferences", "relationships", "aliases"]: + if key in data and data[key]: + data[key] = json.loads(data[key]) + else: + data[key] = [] + if data.get("theory_of_mind"): + data["theory_of_mind"] = json.loads(data["theory_of_mind"]) + else: + data["theory_of_mind"] = {} + if data.get("embedding"): + data["embedding"] = json.loads(data["embedding"]) + return data + def get_memories_by_category( self, category_id: str, diff --git a/engram/mcp_server.py b/engram/mcp_server.py index c1aa5bf..b8d508b 100644 --- a/engram/mcp_server.py +++ b/engram/mcp_server.py @@ -397,6 +397,122 @@ async def list_tools() -> List[Tool]: "required": ["content"] } ), + # ---- Episodic Scene tools ---- + Tool( + name="get_scene", + description="Get a specific episodic scene by ID. Returns the scene with its summary, topic, participants, and linked memory IDs.", + inputSchema={ + "type": "object", + "properties": { + "scene_id": { + "type": "string", + "description": "The ID of the scene to retrieve" + } + }, + "required": ["scene_id"] + } + ), + Tool( + name="list_scenes", + description="List episodic scenes chronologically. Filter by user, topic, or time range.", + inputSchema={ + "type": "object", + "properties": { + "user_id": { + "type": "string", + "description": "User identifier (default: 'default')" + }, + "topic": { + "type": "string", + "description": "Filter scenes containing this topic keyword" + }, + "start_after": { + "type": "string", + "description": "Only scenes starting after this ISO timestamp" + }, + "start_before": { + "type": "string", + "description": "Only scenes starting before this ISO timestamp" + }, + "limit": { + "type": "integer", + "description": "Maximum number of scenes to return (default: 20)" + } + } + } + ), + Tool( + name="search_scenes", + description="Semantic search over episodic scene summaries. Use to find past episodes by topic or content.", + inputSchema={ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The search query" + }, + "user_id": { + "type": "string", + "description": "User identifier (default: 'default')" + }, + "limit": { + "type": "integer", + "description": "Maximum number of results (default: 10)" + } + }, + "required": ["query"] + } + ), + # ---- Character Profile tools ---- + Tool( + name="get_profile", + description="Get a character profile by ID. Returns facts, preferences, relationships, and narrative for a person or entity.", + inputSchema={ + "type": "object", + "properties": { + "profile_id": { + "type": "string", + "description": "The ID of the profile to retrieve" + } + }, + "required": ["profile_id"] + } + ), + Tool( + name="list_profiles", + description="List all character profiles for a user. Includes self-profile, contacts, and entities.", + inputSchema={ + "type": "object", + "properties": { + "user_id": { + "type": "string", + "description": "User identifier (default: 'default')" + } + } + } + ), + Tool( + name="search_profiles", + description="Search character profiles by name or description. Finds people and entities mentioned in memories.", + inputSchema={ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The search query (name, fact, or description)" + }, + "user_id": { + "type": "string", + "description": "User identifier (default: 'default')" + }, + "limit": { + "type": "integer", + "description": "Maximum number of results (default: 10)" + } + }, + "required": ["query"] + } + ), ] @@ -560,6 +676,110 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]: infer=False, ) + # ---- Scene tools ---- + elif name == "get_scene": + scene_id = arguments.get("scene_id", "") + scene = memory.get_scene(scene_id) + if scene: + scene.pop("embedding", None) + result = scene + else: + result = {"error": "Scene not found"} + + elif name == "list_scenes": + user_id = arguments.get("user_id", "default") + scenes = memory.get_scenes( + user_id=user_id, + topic=arguments.get("topic"), + start_after=arguments.get("start_after"), + start_before=arguments.get("start_before"), + limit=arguments.get("limit", 20), + ) + result = { + "scenes": [ + { + "id": s["id"], + "title": s.get("title"), + "topic": s.get("topic"), + "summary": s.get("summary"), + "start_time": s.get("start_time"), + "end_time": s.get("end_time"), + "memory_count": len(s.get("memory_ids", [])), + } + for s in scenes + ], + "total": len(scenes), + } + + elif name == "search_scenes": + query = arguments.get("query", "") + user_id = arguments.get("user_id", "default") + limit = arguments.get("limit", 10) + scenes = memory.search_scenes(query=query, user_id=user_id, limit=limit) + result = { + "scenes": [ + { + "id": s["id"], + "title": s.get("title"), + "summary": s.get("summary"), + "topic": s.get("topic"), + "start_time": s.get("start_time"), + "search_score": s.get("search_score"), + "memory_count": len(s.get("memory_ids", [])), + } + for s in scenes + ], + "total": len(scenes), + } + + # ---- Profile tools ---- + elif name == "get_profile": + profile_id = arguments.get("profile_id", "") + profile = memory.get_profile(profile_id) + if profile: + profile.pop("embedding", None) + result = profile + else: + result = {"error": "Profile not found"} + + elif name == "list_profiles": + user_id = arguments.get("user_id", "default") + profiles = memory.get_all_profiles(user_id=user_id) + result = { + "profiles": [ + { + "id": p["id"], + "name": p.get("name"), + "profile_type": p.get("profile_type"), + "narrative": p.get("narrative"), + "fact_count": len(p.get("facts", [])), + "preference_count": len(p.get("preferences", [])), + } + for p in profiles + ], + "total": len(profiles), + } + + elif name == "search_profiles": + query = arguments.get("query", "") + user_id = arguments.get("user_id", "default") + limit = arguments.get("limit", 10) + profiles = memory.search_profiles(query=query, user_id=user_id, limit=limit) + result = { + "profiles": [ + { + "id": p["id"], + "name": p.get("name"), + "profile_type": p.get("profile_type"), + "narrative": p.get("narrative"), + "facts": p.get("facts", [])[:5], + "search_score": p.get("search_score"), + } + for p in profiles + ], + "total": len(profiles), + } + else: result = {"error": f"Unknown tool: {name}"} diff --git a/engram/memory/main.py b/engram/memory/main.py index b2f4021..a571045 100644 --- a/engram/memory/main.py +++ b/engram/memory/main.py @@ -21,6 +21,8 @@ from engram.core.retrieval import composite_score, tokenize, HybridSearcher from engram.core.category import CategoryProcessor, CategoryMatch from engram.core.graph import KnowledgeGraph +from engram.core.scene import SceneProcessor +from engram.core.profile import ProfileProcessor from engram.db.sqlite import SQLiteManager from engram.exceptions import FadeMemValidationError from engram.memory.base import MemoryBase @@ -128,6 +130,43 @@ def __init__(self, config: Optional[MemoryConfig] = None): else: self.knowledge_graph = None + # Initialize SceneProcessor + self.scene_config = self.config.scene + if self.scene_config.enable_scenes: + self.scene_processor = SceneProcessor( + db=self.db, + embedder=self.embedder, + llm=self.llm, + config={ + "scene_time_gap_minutes": self.scene_config.scene_time_gap_minutes, + "scene_topic_threshold": self.scene_config.scene_topic_threshold, + "auto_close_inactive_minutes": self.scene_config.auto_close_inactive_minutes, + "max_scene_memories": self.scene_config.max_scene_memories, + "use_llm_summarization": self.scene_config.use_llm_summarization, + "summary_regenerate_threshold": self.scene_config.summary_regenerate_threshold, + }, + ) + else: + self.scene_processor = None + + # Initialize ProfileProcessor + self.profile_config = self.config.profile + if self.profile_config.enable_profiles: + self.profile_processor = ProfileProcessor( + db=self.db, + embedder=self.embedder, + llm=self.llm, + config={ + "auto_detect_profiles": self.profile_config.auto_detect_profiles, + "use_llm_extraction": self.profile_config.use_llm_extraction, + "narrative_regenerate_threshold": self.profile_config.narrative_regenerate_threshold, + "self_profile_auto_create": self.profile_config.self_profile_auto_create, + "max_facts_per_profile": self.profile_config.max_facts_per_profile, + }, + ) + else: + self.profile_processor = None + @classmethod def from_config(cls, config_dict: Dict[str, Any]): return cls(MemoryConfig(**config_dict)) @@ -453,6 +492,20 @@ def add( if self.graph_config.auto_link_entities: self.knowledge_graph.link_by_shared_entities(memory_id) + # SceneProcessor: Assign memory to a scene + if self.scene_processor: + try: + self._assign_to_scene(memory_id, content, embedding, user_id, now) + except Exception as e: + logger.warning(f"Scene assignment failed for {memory_id}: {e}") + + # ProfileProcessor: Update profiles from content + if self.profile_processor: + try: + self._update_profiles(memory_id, content, mem_metadata, user_id) + except Exception as e: + logger.warning(f"Profile update failed for {memory_id}: {e}") + results.append( { "id": memory_id, @@ -1763,3 +1816,147 @@ def get_graph_stats(self) -> Dict[str, Any]: stats = self.knowledge_graph.stats() stats["enabled"] = True return stats + + # ========================================================================= + # Scene Methods + # ========================================================================= + + def _assign_to_scene( + self, + memory_id: str, + content: str, + embedding: Optional[List[float]], + user_id: Optional[str], + timestamp: str, + ) -> None: + """Assign a memory to an existing or new scene.""" + if not self.scene_processor or not user_id: + return + + # Auto-close stale scenes first + self.scene_processor.auto_close_stale(user_id) + + current_scene = self.db.get_open_scene(user_id) + detection = self.scene_processor.detect_boundary( + content=content, + timestamp=timestamp, + current_scene=current_scene, + embedding=embedding, + ) + + if detection.is_new_scene: + # Close old scene if open + if current_scene: + self.scene_processor.close_scene(current_scene["id"], timestamp) + + # Detect topic from content (first 60 chars as fallback) + topic = content[:60].strip() + location = detection.detected_location + + self.scene_processor.create_scene( + first_memory_id=memory_id, + user_id=user_id, + timestamp=timestamp, + topic=topic, + location=location, + embedding=embedding, + ) + else: + if current_scene: + self.scene_processor.add_memory_to_scene( + scene_id=current_scene["id"], + memory_id=memory_id, + embedding=embedding, + timestamp=timestamp, + ) + + def _update_profiles( + self, + memory_id: str, + content: str, + metadata: Dict[str, Any], + user_id: Optional[str], + ) -> None: + """Extract and apply profile updates from memory content.""" + if not self.profile_processor or not user_id: + return + + updates = self.profile_processor.extract_profile_mentions( + content=content, + metadata=metadata, + user_id=user_id, + ) + + for update in updates: + self.profile_processor.apply_update( + profile_update=update, + memory_id=memory_id, + user_id=user_id, + ) + + def get_scene(self, scene_id: str) -> Optional[Dict[str, Any]]: + """Get a specific scene by ID.""" + return self.db.get_scene(scene_id) + + def get_scenes( + self, + user_id: Optional[str] = None, + topic: Optional[str] = None, + start_after: Optional[str] = None, + start_before: Optional[str] = None, + limit: int = 50, + ) -> List[Dict[str, Any]]: + """List scenes chronologically.""" + return self.db.get_scenes( + user_id=user_id, + topic=topic, + start_after=start_after, + start_before=start_before, + limit=limit, + ) + + def search_scenes(self, query: str, user_id: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]: + """Semantic search over scene summaries.""" + if not self.scene_processor: + return [] + return self.scene_processor.search_scenes(query=query, user_id=user_id, limit=limit) + + def get_scene_timeline(self, user_id: str, limit: int = 20) -> List[Dict[str, Any]]: + """Get scenes in chronological order.""" + if not self.scene_processor: + return [] + return self.scene_processor.get_scene_timeline(user_id=user_id, limit=limit) + + def get_scene_memories(self, scene_id: str) -> List[Dict[str, Any]]: + """Get all memories in a scene.""" + return self.db.get_scene_memories(scene_id) + + # ========================================================================= + # Profile Methods + # ========================================================================= + + def get_profile(self, profile_id: str) -> Optional[Dict[str, Any]]: + """Get a character profile by ID.""" + return self.db.get_profile(profile_id) + + def get_all_profiles(self, user_id: Optional[str] = None) -> List[Dict[str, Any]]: + """List all profiles for a user.""" + return self.db.get_all_profiles(user_id=user_id) + + def get_self_profile(self, user_id: str) -> Optional[Dict[str, Any]]: + """Get the self-profile for a user.""" + return self.db.get_profile_by_name("self", user_id=user_id) + + def search_profiles(self, query: str, user_id: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]: + """Search profiles by name or description.""" + if not self.profile_processor: + return [] + return self.profile_processor.search_profiles(query=query, user_id=user_id, limit=limit) + + def update_profile(self, profile_id: str, updates: Dict[str, Any]) -> bool: + """Update a profile.""" + return self.db.update_profile(profile_id, updates) + + def get_profile_memories(self, profile_id: str) -> List[Dict[str, Any]]: + """Get memories linked to a profile.""" + return self.db.get_profile_memories(profile_id) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 From 7b0f79f37c74ec973d8568305299aa9931f2a4d5 Mon Sep 17 00:00:00 2001 From: Ashish-dwi99 Date: Mon, 9 Feb 2026 19:48:59 +0530 Subject: [PATCH 2/2] feat: v0.4.0 product-ready release - Version bump to 0.4.0 with Beta classifier - CLI: add `engram serve` alias, `engram status` command, dynamic --version - Docker support: Dockerfile, docker-compose.yml, .dockerignore - GitHub Actions CI: test workflow (Python 3.9, 3.11, 3.12) - README rewritten as product-focused documentation - CHANGELOG.md with all versions - PMK v2: staged writes, policy gateway, dual retrieval, namespaces, agent trust - REST API overhaul: session tokens, capability scoping, sleep-cycle maintenance - Dashboard visualizer, episodic store, context packer, reranker Co-Authored-By: Claude Opus 4.6 --- .dockerignore | 16 + .github/workflows/test.yml | 28 + CHANGELOG.md | 53 + Dockerfile | 9 + README.md | 802 ++------ docker-compose.yml | 15 + engram/__init__.py | 2 +- engram/api/__init__.py | 3 +- engram/api/app.py | 750 ++++++++ engram/api/auth.py | 76 + engram/api/schemas.py | 105 ++ engram/api/server.py | 435 +---- engram/api/static/dashboard.html | 736 ++++++++ engram/core/invariants.py | 110 ++ engram/core/kernel.py | 1346 +++++++++++++ engram/core/policy.py | 148 ++ engram/core/provenance.py | 40 + engram/core/scene.py | 5 + engram/db/sqlite.py | 1670 ++++++++++++++++- engram/decay/__init__.py | 5 + engram/decay/refcounts.py | 45 + engram/integrations/claude_code_plugin.py | 12 + engram/integrations/openclaw.py | 30 +- engram/main_cli.py | 70 +- engram/mcp_server.py | 494 ++++- engram/memory/client.py | 201 +- engram/memory/episodic_store.py | 290 +++ engram/memory/main.py | 395 +++- engram/memory/staging_store.py | 97 + engram/observability.py | 43 + engram/retrieval/__init__.py | 5 + engram/retrieval/context_packer.py | 62 + engram/retrieval/dual_search.py | 136 ++ engram/retrieval/reranker.py | 37 + plugins/engram-memory/hooks/prompt_context.py | 153 +- pyproject.toml | 10 +- 36 files changed, 7216 insertions(+), 1218 deletions(-) create mode 100644 .dockerignore create mode 100644 .github/workflows/test.yml create mode 100644 CHANGELOG.md create mode 100644 Dockerfile create mode 100644 docker-compose.yml create mode 100644 engram/api/app.py create mode 100644 engram/api/auth.py create mode 100644 engram/api/schemas.py create mode 100644 engram/api/static/dashboard.html create mode 100644 engram/core/invariants.py create mode 100644 engram/core/kernel.py create mode 100644 engram/core/policy.py create mode 100644 engram/core/provenance.py create mode 100644 engram/decay/__init__.py create mode 100644 engram/decay/refcounts.py create mode 100644 engram/memory/episodic_store.py create mode 100644 engram/memory/staging_store.py create mode 100644 engram/retrieval/__init__.py create mode 100644 engram/retrieval/context_packer.py create mode 100644 engram/retrieval/dual_search.py create mode 100644 engram/retrieval/reranker.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..577c2c3 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,16 @@ +.venv +.git +__pycache__ +tests +.claude +*.db +*.db-journal +*.db-wal +*.db-shm +node_modules +dist +.pytest_cache +.eggs +*.egg-info +.env +.env.local diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..d5cd856 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,28 @@ +name: Tests + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: pip install -e ".[dev]" + + - name: Run tests + run: pytest tests/ -v diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f95bfc3 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,53 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/), and this project adheres to [Semantic Versioning](https://semver.org/). + +## [0.4.0] - 2025-02-09 + +### Added +- Docker support (Dockerfile + docker-compose.yml) +- GitHub Actions CI workflow (Python 3.9, 3.11, 3.12) +- `engram serve` command (alias for `server`) +- `engram status` command (version, config paths, DB stats) +- Landing page waitlist section for hosted cloud +- CHANGELOG.md + +### Changed +- Version bump to 0.4.0 +- README rewritten as product-focused documentation +- CLI `--version` now pulls from `engram.__version__` +- pyproject.toml updated with Beta classifier and new keywords + +## [0.3.0] - 2025-01-15 + +### Added +- PMK v2: staged writes with policy gateway +- Dual retrieval (semantic + episodic) +- Namespace and agent trust system +- Session tokens with capability scoping +- Sleep-cycle background maintenance +- Reference-aware decay (preserve strongly referenced memories) + +## [0.2.0] - 2025-01-01 + +### Added +- Episodic scenes (CAST grouping with time gap and topic shift detection) +- Character profiles (extraction, self-profile, narrative generation) +- Dashboard visualizer +- Claude Code plugin (hooks, commands, skill) +- OpenClaw integration + +## [0.1.0] - 2024-12-01 + +### Added +- FadeMem: dual-layer memory (SML/LML) with Ebbinghaus decay +- EchoMem: multi-modal encoding (keywords, paraphrases, implications, questions) +- CategoryMem: dynamic hierarchical category organization +- MCP server for Claude Code, Cursor, Codex +- REST API server +- Knowledge graph with entity extraction and linking +- Hybrid search (semantic + keyword) +- CLI with add, search, list, stats, decay, export, import commands +- Ollama support for local LLMs diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2dc31b9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.11-slim +WORKDIR /app +COPY pyproject.toml README.md ./ +COPY engram/ engram/ +RUN pip install --no-cache-dir ".[api]" +EXPOSE 8100 +ENV ENGRAM_DATA_DIR=/data +VOLUME /data +CMD ["engram-api", "--host", "0.0.0.0", "--port", "8100"] diff --git a/README.md b/README.md index 58b5713..f787876 100644 --- a/README.md +++ b/README.md @@ -4,776 +4,229 @@
-

- The Memory Layer for AI Agent Orchestrators -

-

- Give your agents persistent memory that learns, forgets, and shares knowledge like humans do. -

- Native MCP integration for Claude Code, Cursor, and OpenAI Codex.
- Bio-inspired architecture: memories strengthen with use, fade when irrelevant.
- Multi-agent knowledge sharing with user and agent scoping. + Memory layer for AI agents with biologically-inspired forgetting.

- Quick Start • - Why Engram • - Multi-Agent • - Claude Code, Cursor & Codex • - API + MIT License + Tests + Python 3.9+

---- - -## Why Engram? - -| Feature | Other Memory Layers | **Engram** | -|---------|---------------------|------------| -| Bio-inspired forgetting | No | **Ebbinghaus decay** | -| Multi-modal encoding | No | **5 modes (echo)** | -| Knowledge graph | Sometimes | **Entity linking** | -| Dynamic categories | Rare | **Auto-discovered** | -| Category decay | No | **Bio-inspired** | -| Hybrid search | Vector only | **Semantic + Keyword** | -| Storage efficiency | Store everything | **~45% less** | -| MCP Server | Rare | **Claude/Cursor/Codex** | -| Local LLMs (Ollama) | Sometimes | **Yes** | -| Self-hosted | Cloud-first | **Local-first** | - -**Engram is different.** While other memory layers store everything forever, Engram uses bio-inspired mechanisms: - -- **Memories fade** when not accessed (Ebbinghaus decay curve) -- **Important memories strengthen** through repeated access and get promoted to long-term storage -- **Echo encoding** creates multiple retrieval paths (keywords, paraphrases, implications) -- **Dynamic categories** emerge from content and evolve over time -- **Knowledge graph** links memories by shared entities for relationship reasoning -- **Hybrid search** combines semantic similarity with keyword matching - -The result: **better retrieval precision, lower storage costs, and memories that actually matter.** +

+ Quick Start · + What is Engram · + Features · + Integrations · + API +

--- ## Quick Start -### Installation - -```bash -# Clone the repository -git clone https://github.com/Ashish-dwi99/Engram.git -cd Engram - -# Install with all dependencies -pip install -e ".[all]" - -# Set your API key -export GEMINI_API_KEY="your-key" # or OPENAI_API_KEY -``` - -Or install directly from GitHub: ```bash -pip install "engram[all] @ git+https://github.com/Ashish-dwi99/Engram.git" +pip install -e ".[all]" # 1. Install +export GEMINI_API_KEY="..." # 2. Set API key (or OPENAI_API_KEY) +engram install # 3. Configure Claude Code, Cursor, Codex ``` -### Usage - -**Python SDK:** - -```python -from engram import Engram - -memory = Engram() -memory.add("User prefers Python over JavaScript", user_id="u123") -results = memory.search("programming preferences", user_id="u123") -``` - -**Claude Code / Cursor / Codex Integration:** - -For the full setup with MCP tools, proactive hooks, and slash commands, see [Claude Code, Cursor & Codex Setup](#claude-code-cursor--codex-setup) below. +Done. Your agents now have persistent memory. --- -## Multi-Agent Memory - -Engram is designed for agent orchestrators. Every memory is scoped by `user_id` and optionally `agent_id`, enabling: - -### Knowledge Isolation - -```python -# Agent 1 stores knowledge -memory.add("Project deadline is Friday", user_id="project_x", agent_id="planner") - -# Agent 2 stores different knowledge -memory.add("Budget is $50k", user_id="project_x", agent_id="analyst") +## What is Engram -# Search across all agents for a user -all_results = memory.search("project details", user_id="project_x") - -# Search only one agent's knowledge -planner_results = memory.search("deadlines", user_id="project_x", agent_id="planner") -``` - -### Cross-Agent Knowledge Sharing - -```python -# Researcher agent discovers information -memory.add( - "The API rate limit is 100 req/min", - user_id="team_alpha", - agent_id="researcher", - categories=["technical", "api"] -) - -# Coder agent can access shared knowledge -results = memory.search("rate limits", user_id="team_alpha") -# Returns the researcher's finding -``` +Engram is a memory layer for AI agents. It stores knowledge, forgets what doesn't matter, and strengthens what does — using mechanisms inspired by how biological memory works. It plugs into Claude Code, Cursor, and Codex via MCP, or into any application via REST API and Python SDK. -### Memory Layers for Different Retention +**100% free, forever. Bring your own API key (Gemini, OpenAI, or Ollama).** -```python -# Short-term (SML): Fast decay, recent context -# Long-term (LML): Slow decay, important facts - -# Get only long-term memories -important = memory.get_all(user_id="u123", layer="lml") - -# Memories auto-promote based on access patterns -# Or manually promote critical information -memory.promote(memory_id="abc123") -``` +--- -### Agent-Specific Statistics +## Key Features -```python -stats = memory.stats(user_id="project_x", agent_id="planner") -# { -# "total": 42, -# "sml_count": 30, -# "lml_count": 12, -# "avg_strength": 0.73, -# "categories": ["deadlines", "tasks", "dependencies"] -# } -``` +- **FadeMem** — Dual-layer memory (short-term / long-term) with Ebbinghaus decay. Memories fade when unused, strengthen when accessed, and promote automatically. +- **EchoMem** — Multi-modal encoding creates multiple retrieval paths (keywords, paraphrases, implications, question forms) for better recall. +- **CategoryMem** — Dynamic hierarchical categories emerge from content and evolve over time. Categories decay too. +- **Scenes** — Episodic memory groups interactions into narrative scenes with time gap and topic shift detection. +- **Profiles** — Character profile extraction tracks entities across conversations. +- **Knowledge Graph** — Entity extraction and linking for relationship reasoning across memories. +- **MCP Server** — Native Model Context Protocol integration for Claude Code, Cursor, and Codex. +- **REST API** — Language-agnostic HTTP API with session tokens, staged writes, and namespace scoping. +- **Hybrid Search** — Combines semantic similarity with keyword matching for better precision. +- **Multi-Agent** — Scoped by user and agent. Agents share knowledge or isolate it. +- **~45% Storage Reduction** — Compared to store-everything approaches. --- -## Claude Code, Cursor & Codex Setup - -Engram provides a native MCP (Model Context Protocol) server for seamless integration with Claude Code, Cursor, and OpenAI Codex. - -### Step-by-Step Setup +## Installation -**1. Install Engram** (in your terminal, not Claude Code): +### pip (recommended) ```bash -git clone https://github.com/Ashish-dwi99/Engram.git -cd Engram -python3 -m venv .venv -source .venv/bin/activate # Windows: .venv\Scripts\activate pip install -e ".[all]" ``` -**2. Set your API key** (pick one): +### Docker ```bash -export GEMINI_API_KEY="your-key-here" -# OR -export OPENAI_API_KEY="your-key-here" +docker compose up -d +# API available at http://localhost:8100 ``` -**3. Run the installer** (still in your terminal): +### From source ```bash -engram-install +git clone https://github.com/Ashish-dwi99/Engram.git +cd Engram +python3 -m venv .venv && source .venv/bin/activate +pip install -e ".[all]" ``` -**What this does automatically:** -- ✓ Writes MCP server config to `~/.claude.json` (Claude Code) -- ✓ Writes MCP server config to `~/Library/Application Support/Claude/claude_desktop_config.json` (Claude Desktop) -- ✓ Writes MCP server config to `~/.cursor/mcp.json` (Cursor) -- ✓ Writes MCP server config to `~/.codex/config.toml` (Codex) -- ✓ Deploys plugin files to `~/.engram/claude-plugin/engram-memory/` -- ✓ Forwards your API keys to all configs - -**What it does NOT do:** -- ✗ Does not start the Engram API (you need to run `engram-api` separately) -- ✗ Does not activate the Claude Code plugin (requires `/plugin install` command) -- ✗ Does not restart Claude Code (you need to restart manually) - -**4. Restart Claude Code** so it loads the new MCP config. - -**5. Start the Engram API** (in a separate terminal, leave it running): +Set one API key: ```bash -engram-api -# Runs on http://127.0.0.1:8100 -``` - -> **Note:** The API is required for the proactive hook to work. If the API is down, the hook exits silently and Claude Code continues normally (just without auto-injected context). - -**6. Activate the plugin** (inside Claude Code): - -``` -/plugin install engram-memory --path ~/.engram/claude-plugin -``` - -**Done!** The 10 MCP tools are now available, the proactive hook is active, and you can use `/engram:remember`, `/engram:search`, etc. - ---- - -### What You Get After Setup - -| Feature | Available After Step | -|---|---| -| 10 MCP tools (`add_memory`, `search_memory`, etc.) | Step 4 (restart Claude Code) | -| Proactive memory injection (hook) | Step 6 (plugin activated) + API running | -| `/engram:*` slash commands | Step 6 (plugin activated) | -| Skill (standing instructions) | Step 6 (plugin activated) | - -If you only want the MCP tools (no proactive hook), stop after step 4. Steps 5-6 are only for the plugin features. - ---- - -### Troubleshooting - -**"Claude Code doesn't see the memory tools"** -- Restart Claude Code after running `engram-install` -- Check that `~/.claude.json` exists and has an `mcpServers.engram-memory` section -- Verify your API key is set: `echo $GEMINI_API_KEY` - -**"The hook isn't injecting memories"** -- Check that `engram-api` is running: `curl http://127.0.0.1:8100/health` -- Verify the plugin is activated: in Claude Code, run `/plugin` and check that `engram-memory` appears in the list -- Check the hook script is executable: `ls -l ~/.engram/claude-plugin/engram-memory/hooks/prompt_context.py` (should show `x` permission) - -**"Plugin activation failed"** -- Verify plugin files exist: `ls ~/.engram/claude-plugin/engram-memory/` -- If missing, run `engram-install` again -- Make sure you're using the full path: `/plugin install engram-memory --path ~/.engram/claude-plugin` - -**"API won't start (port already in use)"** -- Check if another instance is running: `lsof -i :8100` -- Kill it: `kill ` then restart `engram-api` -- Or use a different port: `ENGRAM_API_PORT=8200 engram-api` - ---- - -### Manual Configuration - -#### Claude Code / Claude Desktop - -Add to `~/.claude.json` (CLI) or `claude_desktop_config.json` (Desktop): - -```json -{ - "mcpServers": { - "engram-memory": { - "command": "python", - "args": ["-m", "engram.mcp_server"], - "env": { - "GEMINI_API_KEY": "your-api-key" - } - } - } -} -``` - -#### Cursor - -Add to `~/.cursor/mcp.json`: - -```json -{ - "mcpServers": { - "engram-memory": { - "command": "python", - "args": ["-m", "engram.mcp_server"], - "env": { - "GEMINI_API_KEY": "your-api-key" - } - } - } -} -``` - -> **Note:** If the file doesn't exist, create it. You can also configure MCP servers through Cursor's Settings UI under the MCP section. - -#### OpenAI Codex - -Add to `~/.codex/config.toml`: - -```toml -[mcp_servers.engram-memory] -command = "python" -args = ["-m", "engram.mcp_server"] - -[mcp_servers.engram-memory.env] -GEMINI_API_KEY = "your-api-key" -``` - -### Claude Code Plugin (Proactive Memory) - -The MCP tools above let Claude *react* to your requests. The **Claude Code plugin** makes memory *proactive* — relevant context is injected automatically before Claude even sees your message. - -`engram-install` deploys the plugin to `~/.engram/claude-plugin/engram-memory/`. To activate it inside Claude Code, run: - -``` -/plugin install engram-memory --path ~/.engram/claude-plugin -``` - -> **Requires a running Engram API** (`engram-api`) for the hook to fetch memories. If the API is down the hook exits silently — nothing breaks, you just don't get the auto-injection. - -#### What the plugin adds - -| Piece | What it does | -|---|---| -| **UserPromptSubmit hook** | Before each reply, queries Engram and injects matching memories into Claude's context. Stdlib-only script, no extra deps. | -| `/engram:remember ` | Save a fact or preference on the spot | -| `/engram:search ` | Search memories by topic | -| `/engram:forget ` | Delete a memory (confirms before removing) | -| `/engram:status` | Show memory-store stats at a glance | -| **Skill (standing instructions)** | Tells Claude when to save, when to search, and how to surface injected context naturally | - -#### How the hook works - -``` -User types a message - → hook reads it, extracts a short query (no LLM, pure string ops) - → GET /health (3 s timeout — fast-fail if API is down) - → POST /v1/search (6 s timeout) - → matching memories injected as a system message - → Claude replies with that context already loaded +export GEMINI_API_KEY="your-key" # Gemini (default) +# or OPENAI_API_KEY for OpenAI +# or OLLAMA_HOST for local Ollama (no key needed) ``` -Total added latency is typically under 2 seconds, well within the 8-second hook timeout. On any failure the hook outputs `{}` and Claude proceeds normally. - --- -### Available MCP Tools - -Once configured, your agent has access to these tools: +## Usage -| Tool | Description | Example Use | -|------|-------------|-------------| -| `add_memory` | Store a new memory | "Remember that the user prefers dark mode" | -| `search_memory` | Find relevant memories | "What are the user's UI preferences?" | -| `get_all_memories` | List all stored memories | "Show me everything I know about this user" | -| `get_memory` | Get a specific memory by ID | Retrieve exact memory content | -| `update_memory` | Update memory content | Correct outdated information | -| `delete_memory` | Remove a memory | Remove sensitive or incorrect data | -| `get_memory_stats` | Get storage statistics | Monitor memory health | -| `apply_memory_decay` | Run forgetting algorithm | Periodic cleanup of stale memories | -| `engram_context` | Load session digest from prior sessions | Call once at conversation start; returns top memories, LML first | -| `remember` | Quick-save a fact or preference | Stores directly with `source_app=claude-code`, no LLM extraction | +### MCP Tools -### Example: Claude Code with Memory +After running `engram install`, your agent gets 14 MCP tools including: -**Without the plugin** — Claude reacts to explicit requests via MCP tools: +| Tool | Description | +|------|-------------| +| `add_memory` | Store a new memory | +| `search_memory` | Semantic + keyword search | +| `get_all_memories` | List stored memories | +| `update_memory` / `delete_memory` | Modify or remove | +| `apply_memory_decay` | Run forgetting algorithm | +| `engram_context` | Load session digest from prior sessions | +| `remember` | Quick-save a fact (no LLM extraction) | +| `search_scenes` / `get_scene` | Episodic scene retrieval | -``` -You: Remember that I prefer using TypeScript for all new projects - -Claude: I'll remember that preference for you. -[Calls remember tool → stored with source_app=claude-code] -``` - -**With the plugin** — memory is proactive and invisible: +### CLI +```bash +engram add "User prefers Python" -u user123 +engram search "programming" -u user123 +engram list -u user123 +engram stats +engram status # Version, config paths, DB stats +engram serve # Start REST API +engram decay # Apply forgetting +engram export -o memories.json +engram import memories.json ``` ---- Session A --- -You: /engram:remember I prefer TypeScript for all new projects -Claude: Saved to memory. ---- Session B (new conversation, no history) --- -You: What stack should I use for the new API? +### Python SDK -[Hook runs silently: queries Engram, injects "TypeScript preference" into context] +```python +from engram import Engram -Claude: Based on your preferences, I'd recommend TypeScript... - (no search_memory call needed — context was already there) +memory = Engram() +memory.add("User prefers Python", user_id="u123") +results = memory.search("programming preferences", user_id="u123") ``` -### Example: Multi-Agent Codex Workflow +Full interface with `Memory` class: ```python -# Agent 1: Research Agent -memory.add( - "The target API uses OAuth 2.0 with JWT tokens", - user_id="project_123", - agent_id="researcher" -) - -# Agent 2: Implementation Agent searches shared knowledge -results = memory.search("authentication method", user_id="project_123") -# Finds: "OAuth 2.0 with JWT tokens" - -# Agent 3: Review Agent adds findings -memory.add( - "Security review passed for OAuth implementation", - user_id="project_123", - agent_id="reviewer" -) -``` - ---- +from engram import Memory -## REST API +memory = Memory() +memory.add(content, user_id, agent_id=None, categories=None, metadata=None) +memory.search(query, user_id, limit=10) +memory.get(memory_id) +memory.update(memory_id, content) +memory.delete(memory_id) +memory.promote(memory_id) # SML -> LML +memory.history(memory_id) +memory.get_related_memories(memory_id) # Knowledge graph +``` -Start the HTTP API server for language-agnostic integration: +### REST API ```bash engram-api # Starts on http://127.0.0.1:8100 ``` -### Endpoints - ```bash # Add memory curl -X POST http://localhost:8100/v1/memories \ -H "Content-Type: application/json" \ - -d '{"content": "User prefers dark mode", "user_id": "u123", "agent_id": "ui_agent"}' + -d '{"content": "User prefers dark mode", "user_id": "u123"}' -# Search memories +# Search curl -X POST http://localhost:8100/v1/search \ -H "Content-Type: application/json" \ -d '{"query": "UI preferences", "user_id": "u123"}' -# Get all memories -curl "http://localhost:8100/v1/memories?user_id=u123" - -# Get statistics +# Stats curl "http://localhost:8100/v1/stats?user_id=u123" - -# Apply decay (forgetting) -curl -X POST http://localhost:8100/v1/decay \ - -H "Content-Type: application/json" \ - -d '{"user_id": "u123"}' - -# Get categories -curl "http://localhost:8100/v1/categories?user_id=u123" ``` -API documentation: http://localhost:8100/docs +Full API docs at http://localhost:8100/docs --- -## How It Works +## Integrations -Engram combines three bio-inspired memory systems: - -### FadeMem: Decay & Consolidation - -``` -┌─────────────────────────────────────────────────────────┐ -│ Memory Lifecycle │ -├─────────────────────────────────────────────────────────┤ -│ │ -│ New Memory → Short-term (SML) │ -│ │ │ -│ │ Accessed frequently? │ -│ ▼ │ -│ ┌─────────┐ │ -│ No ← │ Decay │ → Yes │ -│ └─────────┘ │ -│ │ │ │ -│ ▼ ▼ │ -│ Forgotten Promoted to Long-term (LML) │ -│ │ -└─────────────────────────────────────────────────────────┘ -``` - -- **Adaptive Decay**: Memories fade based on time and access patterns -- **Dual-Layer Architecture**: Short-term (fast decay) → Long-term (slow decay) -- **Automatic Promotion**: Frequently accessed memories get promoted -- **Conflict Resolution**: LLM detects contradictions and updates old info -- **~45% Storage Reduction**: Compared to store-everything approaches - -### EchoMem: Multi-Modal Encoding - -``` -Input: "User prefers TypeScript over JavaScript" - │ - ▼ -┌─────────────────────────────────────────────────────────┐ -│ Stored Memory │ -├─────────────────────────────────────────────────────────┤ -│ raw: "User prefers TypeScript over JavaScript" │ -│ paraphrase: "TypeScript is the user's preferred..." │ -│ keywords: ["typescript", "javascript", "preference"] │ -│ implications: ["values type safety", "modern tooling"] │ -│ question_form: "What language does the user prefer?" │ -│ strength: 1.3x (medium depth) │ -└─────────────────────────────────────────────────────────┘ -``` +### Claude Code -- **Multiple Retrieval Paths**: Keywords, paraphrases, implications, questions -- **Importance-Based Depth**: Critical info gets deeper processing (1.6x strength) -- **Better Query Matching**: Question-form embeddings match search queries -- **Re-Echo on Access**: Accessed memories get stronger encoding - -### CategoryMem: Dynamic Organization - -``` -┌─────────────────────────────────────────────────────────┐ -│ Auto-Generated Categories │ -├─────────────────────────────────────────────────────────┤ -│ │ -│ preferences/ technical/ │ -│ ├── coding/ ├── apis/ │ -│ │ ├── languages (3) │ └── rate_limits (2) │ -│ │ └── tools (2) └── infrastructure (4) │ -│ └── ui (4) │ -│ │ -│ projects/ corrections/ │ -│ └── active (6) └── learned (2) │ -│ │ -└─────────────────────────────────────────────────────────┘ +```bash +engram install # Writes MCP config to ~/.claude.json ``` -- **Dynamic Categories**: Auto-discovered from content, not predefined -- **Category Decay**: Unused categories weaken and merge -- **Category-Aware Search**: Boost results from relevant categories -- **Hierarchical Structure**: Up to 3 levels of nesting - ---- - -## Architecture - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ Agent Orchestrator │ -│ (Claude Code / Codex / LangChain / etc.) │ -└─────────────────────────────────────────────────────────────────┘ - │ - ┌───────────────┼───────────────┐ - │ │ │ - ▼ ▼ ▼ - ┌──────────┐ ┌──────────┐ ┌──────────┐ - │ Agent 1 │ │ Agent 2 │ │ Agent 3 │ - │ (user, │ │ (user, │ │ (user, │ - │ agent) │ │ agent) │ │ agent) │ - └──────────┘ └──────────┘ └──────────┘ - │ │ │ - └───────────────┼───────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ Engram │ -│ ┌───────────────────────────────────────────────────────────┐ │ -│ │ Knowledge Graph Layer │ │ -│ │ (Entity Extraction & Linking) │ │ -│ └───────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ┌───────────────────────────────────────────────────────────┐ │ -│ │ CategoryMem Layer │ │ -│ │ (Dynamic Hierarchical Organization) │ │ -│ └───────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ┌───────────────────────────────────────────────────────────┐ │ -│ │ EchoMem Layer │ │ -│ │ (Multi-Modal Encoding & Retrieval) │ │ -│ └───────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ┌───────────────────────────────────────────────────────────┐ │ -│ │ FadeMem Layer │ │ -│ │ (Decay, Promotion & Consolidation) │ │ -│ └───────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │ -│ │ Embedder │ │ LLM │ │ Vector Store │ │ -│ │ (Gemini/ │ │ (Gemini/ │ │ (Qdrant/In-memory) │ │ -│ │ OpenAI/Ollama│ │ OpenAI/Ollama│ │ │ │ -│ └──────────────┘ └──────────────┘ └──────────────────────┘ │ -└─────────────────────────────────────────────────────────────────┘ -``` +The optional **Claude Code plugin** adds proactive memory injection (relevant context is loaded before each reply), slash commands (`/engram:remember`, `/engram:search`, `/engram:status`), and standing instructions. ---- - -## API Reference - -### Engram Class (Simple Interface) - -```python -from engram import Engram - -memory = Engram( - provider="gemini", # or "openai", "ollama" - auto-detected from env - in_memory=False, # True for testing - enable_echo=True, # Multi-modal encoding - enable_categories=True, # Dynamic categorization - enable_graph=True # Knowledge graph for entity linking -) - -# Add memory -memory.add(content, user_id, agent_id=None, categories=None, metadata=None) - -# Search memories -memory.search(query, user_id, agent_id=None, limit=10, categories=None) - -# Get all memories -memory.get_all(user_id, agent_id=None, layer=None, limit=100) - -# Get statistics -memory.stats(user_id=None, agent_id=None) - -# Apply decay (forgetting) -memory.decay(user_id=None, agent_id=None) +```bash +# Activate plugin inside Claude Code: +/plugin install engram-memory --path ~/.engram/claude-plugin ``` -### Memory Class (Full Interface) - -```python -from engram import Memory -from engram.configs.base import MemoryConfig +Requires `engram-api` running for the proactive hook. -config = MemoryConfig( - # Vector store: "qdrant" or "memory" - # LLM: "gemini" or "openai" - # FadeMem, EchoMem, CategoryMem configs -) +### Cursor -memory = Memory(config) +`engram install` writes MCP config to `~/.cursor/mcp.json`. Restart Cursor to load. -# All Engram methods plus: -memory.get(memory_id) -memory.update(memory_id, content) -memory.delete(memory_id) -memory.delete_all(user_id=None, agent_id=None) -memory.history(memory_id) -memory.promote(memory_id) # SML → LML -memory.demote(memory_id) # LML → SML -memory.fuse(memory_ids) # Combine related memories - -# Category methods -memory.get_category_tree() -memory.get_all_summaries() -memory.search_by_category(category_id) - -# Knowledge graph methods -memory.get_related_memories(memory_id) # Graph traversal -memory.get_memory_entities(memory_id) # Extracted entities -memory.get_entity_memories(entity_name) # Memories with entity -memory.get_memory_graph(memory_id) # Visualization data -memory.get_graph_stats() # Graph statistics -``` +### OpenAI Codex -### Async Support +`engram install` writes MCP config to `~/.codex/config.toml`. Restart Codex to load. -```python -from engram.memory.async_memory import AsyncMemory +### OpenClaw -async with AsyncMemory() as memory: - await memory.add("User prefers Python", user_id="u1") - results = await memory.search("programming", user_id="u1") -``` +`engram install` deploys the Engram skill to OpenClaw's skills directory. --- -## Configuration - -### Environment Variables - -```bash -# LLM & Embeddings (choose one) -export GEMINI_API_KEY="your-key" # Gemini (default) -export OPENAI_API_KEY="your-key" # OpenAI -export OLLAMA_HOST="http://localhost:11434" # Ollama (local, no key needed) +## Architecture -# Optional: Vector store -export QDRANT_HOST="localhost" -export QDRANT_PORT="6333" ``` - -### Full Configuration - -```python -from engram.configs.base import ( - MemoryConfig, - FadeMemConfig, - EchoMemConfig, - CategoryMemConfig, -) - -config = MemoryConfig( - # FadeMem: Decay & consolidation - fadem=FadeMemConfig( - enable_forgetting=True, - sml_decay_rate=0.15, # Short-term decay - lml_decay_rate=0.02, # Long-term decay - promotion_access_threshold=3, - forgetting_threshold=0.1, - ), - - # EchoMem: Multi-modal encoding - echo=EchoMemConfig( - enable_echo=True, - auto_depth=True, - shallow_multiplier=1.0, - medium_multiplier=1.3, - deep_multiplier=1.6, - ), - - # CategoryMem: Dynamic organization - category=CategoryMemConfig( - enable_categories=True, - auto_categorize=True, - enable_category_decay=True, - max_category_depth=3, - ), -) +Agent (Claude Code / Codex / Cursor / LangChain) + │ + ▼ +┌─────────────────────────────────────────────┐ +│ Engram │ +│ │ +│ Knowledge Graph (entity linking) │ +│ CategoryMem (dynamic organization) │ +│ EchoMem (multi-modal encoding) │ +│ FadeMem (decay & consolidation) │ +│ │ +│ Embedder: Gemini / OpenAI / Ollama │ +│ Store: SQLite + in-memory vectors │ +└─────────────────────────────────────────────┘ ``` ---- - -## CLI - -```bash -# Install MCP server for Claude/Cursor/Codex -engram-install - -# Start REST API server -engram-api - -# Start MCP server directly -engram-mcp - -# Interactive commands -engram add "User prefers Python" --user u123 -engram search "programming" --user u123 -engram list --user u123 -engram stats --user u123 -engram decay --user u123 -engram categories --user u123 -engram export --user u123 --output memories.json -engram import memories.json --user u123 # Import from Engram/Mem0 format -``` - ---- - -## Research - -Engram is based on the paper: - -> **FadeMem: Biologically-Inspired Forgetting for Efficient Agent Memory** -> -> arXiv:2601.18642 - -### Key Results - -| Metric | Improvement | -|--------|-------------| -| Storage Reduction | ~45% | -| Multi-hop Reasoning | +12% accuracy | -| Retrieval Precision | +8% on LTI-Bench | - -### Biological Inspiration - -- **Ebbinghaus Forgetting Curve** → Exponential decay -- **Spaced Repetition** → Access boosts strength -- **Sleep Consolidation** → SML → LML promotion -- **Production Effect** → Echo encoding improves retention -- **Elaborative Encoding** → Deeper processing = stronger memory +Memories flow through four layers: FadeMem manages lifecycle (decay, promotion, forgetting), EchoMem creates multiple encodings for better retrieval, CategoryMem organizes content into dynamic hierarchies, and the Knowledge Graph links entities across memories. --- @@ -783,22 +236,17 @@ Engram is based on the paper: git clone https://github.com/Ashish-dwi99/Engram.git cd Engram pip install -e ".[dev]" -pytest +pytest tests/ -v ``` --- ## License -MIT License - see [LICENSE](LICENSE) for details. +MIT License — see [LICENSE](LICENSE) for details. ---

Built for AI agents that need to remember what matters.

- -

- GitHub • - Issues -

diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..a9e7bb1 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,15 @@ +services: + engram: + build: . + ports: + - "8100:8100" + volumes: + - engram-data:/data + environment: + - GEMINI_API_KEY=${GEMINI_API_KEY:-} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - ENGRAM_DATA_DIR=/data + restart: unless-stopped + +volumes: + engram-data: diff --git a/engram/__init__.py b/engram/__init__.py index e0f3c02..7cd6f19 100644 --- a/engram/__init__.py +++ b/engram/__init__.py @@ -21,7 +21,7 @@ from engram.core.echo import EchoProcessor, EchoDepth, EchoResult from engram.configs.base import MemoryConfig, FadeMemConfig, EchoMemConfig, CategoryMemConfig, ScopeConfig -__version__ = "0.2.0" # REST API + Simplified SDK +__version__ = "0.4.0" # Product release: Docker, CI, CLI improvements __all__ = [ # Simplified interface (recommended) "Engram", diff --git a/engram/api/__init__.py b/engram/api/__init__.py index 04b1ec9..21f4808 100644 --- a/engram/api/__init__.py +++ b/engram/api/__init__.py @@ -1,5 +1,6 @@ """Engram REST API module.""" -from engram.api.server import app, run +from engram.api.app import app +from engram.api.server import run __all__ = ["app", "run"] diff --git a/engram/api/app.py b/engram/api/app.py new file mode 100644 index 0000000..fa44ffb --- /dev/null +++ b/engram/api/app.py @@ -0,0 +1,750 @@ +"""Engram v2 REST API application.""" + +from __future__ import annotations + +import logging +import os +import threading +from datetime import date +from typing import Any, Dict, List, Optional, Union + +from fastapi import FastAPI, HTTPException, Query, Request +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import FileResponse, JSONResponse +from fastapi.staticfiles import StaticFiles +from pydantic import BaseModel, Field + +from engram import Memory +from engram.api.auth import ( + enforce_session_issuer, + get_token_from_request, + is_trusted_direct_client, + require_session_error, + require_token_for_untrusted_request, +) +from engram.api.schemas import ( + AddMemoryRequestV2, + AgentPolicyUpsertRequest, + CommitResolutionRequest, + ConflictResolutionRequest, + DailyDigestResponse, + NamespaceDeclareRequest, + NamespacePermissionRequest, + SceneSearchRequest, + SearchRequestV2, + SleepRunRequest, + SessionCreateRequest, + SessionCreateResponse, +) +from engram.core.policy import feature_enabled +from engram.exceptions import FadeMemValidationError +from engram.observability import add_metrics_routes, logger as structured_logger, metrics + +logger = logging.getLogger(__name__) + + +# Legacy response models +class SearchResultResponse(BaseModel): + results: List[Dict[str, Any]] + count: int + context_packet: Optional[Dict[str, Any]] = None + + +class StatsResponse(BaseModel): + total_memories: int + sml_count: int + lml_count: int + categories: Dict[str, int] + storage_mb: Optional[float] = None + + +class DecayRequest(BaseModel): + user_id: Optional[str] = Field(default=None) + agent_id: Optional[str] = Field(default=None) + dry_run: bool = Field(default=False) + + +class DecayResponse(BaseModel): + decayed: int + forgotten: int + promoted: int + stale_refs_removed: int = 0 + dry_run: bool + + +app = FastAPI( + title="Engram API", + description="Engram v2 Personal Memory Kernel API", + version="2.0.0", + docs_url="/docs", + redoc_url="/redoc", +) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) +add_metrics_routes(app) + +_memory: Optional[Memory] = None + + +def get_memory() -> Memory: + global _memory + if _memory is None: + _memory = Memory() + return _memory + + +def get_kernel(): + return get_memory().kernel + + +def _extract_content(messages: Optional[Union[str, List[Dict[str, Any]]]], content: Optional[str]) -> str: + if content is not None: + return str(content) + if isinstance(messages, str): + return messages + if isinstance(messages, list): + parts = [] + for msg in messages: + text = msg.get("content") + if text: + parts.append(str(text)) + return "\n".join(parts) + return "" + + +_TOKEN_EXEMPT_PATHS = { + "/health", + "/v1/version", + "/v1/sessions", + "/docs", + "/redoc", + "/openapi.json", +} + + +@app.middleware("http") +async def enforce_capability_token_for_untrusted_clients(request: Request, call_next): + if request.method.upper() == "OPTIONS": + return await call_next(request) + + path = request.url.path.rstrip("/") or "/" + if path.startswith("/static") or path == "/dashboard": + return await call_next(request) + + if path.startswith("/v1") and path not in _TOKEN_EXEMPT_PATHS: + token = get_token_from_request(request) + try: + require_token_for_untrusted_request(request, token) + except HTTPException as exc: + return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail}) + + return await call_next(request) + + +@app.get("/health") +async def health_check(): + return {"status": "healthy", "service": "engram"} + + +@app.get("/v1/version") +async def get_version(): + from engram import __version__ + + return {"version": __version__, "api_version": "v1", "pmk_version": "2.0"} + + +@app.post("/v1/sessions", response_model=SessionCreateResponse) +@app.post("/v1/sessions/", response_model=SessionCreateResponse) +async def create_session(request: SessionCreateRequest, http_request: Request): + enforce_session_issuer(http_request) + kernel = get_kernel() + try: + payload = kernel.create_session( + user_id=request.user_id, + agent_id=request.agent_id, + allowed_confidentiality_scopes=request.allowed_confidentiality_scopes, + capabilities=request.capabilities, + namespaces=request.namespaces, + ttl_minutes=request.ttl_minutes, + ) + return SessionCreateResponse(**payload) + except PermissionError as exc: + raise HTTPException(status_code=403, detail=str(exc)) + + +@app.post("/v1/search", response_model=SearchResultResponse) +@app.post("/v1/search/", response_model=SearchResultResponse) +@app.post("/v1/memories/search", response_model=SearchResultResponse) +@app.post("/v1/memories/search/", response_model=SearchResultResponse) +async def search_memories(request: SearchRequestV2, http_request: Request): + with metrics.measure("api_search", user_id=request.user_id): + token = get_token_from_request(http_request) + kernel = get_kernel() + try: + payload = kernel.search( + query=request.query, + user_id=request.user_id, + agent_id=request.agent_id, + token=token, + limit=request.limit, + categories=request.categories, + ) + results = payload.get("results", []) + metrics.record_search(0, results_count=len(results)) + return SearchResultResponse( + results=results, + count=len(results), + context_packet=payload.get("context_packet"), + ) + except PermissionError as exc: + raise require_session_error(exc) + except Exception as exc: + logger.exception("Error searching memories") + raise HTTPException(status_code=500, detail=str(exc)) + + +@app.get("/v1/scenes") +@app.get("/v1/scenes/") +async def list_scenes( + user_id: Optional[str] = Query(default=None), + topic: Optional[str] = Query(default=None), + limit: int = Query(default=50, ge=1, le=500), +): + return {"scenes": get_memory().get_scenes(user_id=user_id, topic=topic, limit=limit)} + + +@app.post("/v1/scenes/search") +@app.post("/v1/scenes/search/") +async def search_scenes(request: SceneSearchRequest, http_request: Request): + token = get_token_from_request(http_request) + kernel = get_kernel() + try: + return kernel.search_scenes( + query=request.query, + user_id=request.user_id, + agent_id=request.agent_id, + token=token, + limit=request.limit, + ) + except PermissionError as exc: + raise require_session_error(exc) + + +@app.get("/v1/scenes/{scene_id}") +@app.get("/v1/scenes/{scene_id}/") +async def get_scene( + scene_id: str, + http_request: Request, + user_id: str = Query(default="default"), + agent_id: Optional[str] = Query(default=None), +): + token = get_token_from_request(http_request) + kernel = get_kernel() + try: + scene = kernel.get_scene( + scene_id=scene_id, + user_id=user_id, + agent_id=agent_id, + token=token, + ) + except PermissionError as exc: + raise require_session_error(exc) + + if not scene: + raise HTTPException(status_code=404, detail="Scene not found") + return scene + + +@app.post("/v1/memories", response_model=Dict[str, Any]) +@app.post("/v1/memories/", response_model=Dict[str, Any]) +async def add_memory(request: AddMemoryRequestV2, http_request: Request): + token = get_token_from_request(http_request) + kernel = get_kernel() + content = _extract_content(request.messages, request.content) + if not content: + raise HTTPException(status_code=400, detail="content or messages is required") + + mode = (request.mode or "staging").lower() + if mode not in {"staging", "direct"}: + raise HTTPException(status_code=400, detail="mode must be 'staging' or 'direct'") + + trusted_direct = mode == "direct" and is_trusted_direct_client(http_request) + + try: + return kernel.propose_write( + content=content, + token=token, + user_id=request.user_id, + agent_id=request.agent_id, + categories=request.categories, + metadata=request.metadata, + scope=request.scope or "work", + namespace=request.namespace or "default", + mode=mode, + infer=request.infer, + source_app=request.source_app, + trusted_direct=trusted_direct, + source_type=request.source_type, + source_event_id=request.source_event_id, + ) + except PermissionError as exc: + raise require_session_error(exc) + except Exception as exc: + logger.exception("Error creating proposal/direct memory") + raise HTTPException(status_code=500, detail=str(exc)) + + +@app.get("/v1/staging/commits") +@app.get("/v1/staging/commits/") +async def list_staging_commits( + http_request: Request, + user_id: Optional[str] = Query(default=None), + agent_id: Optional[str] = Query(default=None), + status: Optional[str] = Query(default=None), + limit: int = Query(default=100, ge=1, le=500), +): + kernel = get_kernel() + token = get_token_from_request(http_request) + try: + return kernel.list_pending_commits( + user_id=user_id, + agent_id=agent_id, + token=token, + status=status, + limit=limit, + ) + except PermissionError as exc: + raise require_session_error(exc) + + +@app.post("/v1/staging/commits/{commit_id}/approve") +async def approve_commit( + commit_id: str, + http_request: Request, + agent_id: Optional[str] = Query(default=None), +): + kernel = get_kernel() + token = get_token_from_request(http_request) + try: + return kernel.approve_commit(commit_id=commit_id, token=token, agent_id=agent_id) + except PermissionError as exc: + raise require_session_error(exc) + + +@app.post("/v1/staging/commits/{commit_id}/reject") +async def reject_commit( + commit_id: str, + request: CommitResolutionRequest, + http_request: Request, + agent_id: Optional[str] = Query(default=None), +): + kernel = get_kernel() + token = get_token_from_request(http_request) + try: + return kernel.reject_commit(commit_id=commit_id, reason=request.reason, token=token, agent_id=agent_id) + except PermissionError as exc: + raise require_session_error(exc) + + +@app.post("/v1/conflicts/{stash_id}/resolve") +async def resolve_conflict( + stash_id: str, + request: ConflictResolutionRequest, + http_request: Request, + agent_id: Optional[str] = Query(default=None), +): + kernel = get_kernel() + token = get_token_from_request(http_request) + try: + return kernel.resolve_conflict(stash_id=stash_id, resolution=request.resolution, token=token, agent_id=agent_id) + except PermissionError as exc: + raise require_session_error(exc) + + +@app.get("/v1/digest/daily", response_model=DailyDigestResponse) +async def get_daily_digest( + http_request: Request, + user_id: str = Query(default="default"), + agent_id: Optional[str] = Query(default=None), + date_value: Optional[str] = Query(default=None, alias="date"), +): + kernel = get_kernel() + digest_date = date_value or date.today().isoformat() + token = get_token_from_request(http_request) + try: + payload = kernel.get_daily_digest(user_id=user_id, date_str=digest_date, token=token, agent_id=agent_id) + return DailyDigestResponse(**payload) + except PermissionError as exc: + raise require_session_error(exc) + + +@app.get("/v1/trust") +async def get_agent_trust( + http_request: Request, + user_id: str = Query(default="default"), + agent_id: str = Query(...), + requester_agent_id: Optional[str] = Query(default=None), +): + token = get_token_from_request(http_request) + try: + return get_kernel().get_agent_trust( + user_id=user_id, + agent_id=agent_id, + token=token, + requester_agent_id=requester_agent_id, + ) + except PermissionError as exc: + raise require_session_error(exc) + + +@app.get("/v1/namespaces") +async def list_namespaces( + http_request: Request, + user_id: Optional[str] = Query(default=None), + agent_id: Optional[str] = Query(default=None), +): + token = get_token_from_request(http_request) + try: + namespaces = get_kernel().list_namespaces(user_id=user_id, token=token, agent_id=agent_id) + except PermissionError as exc: + raise require_session_error(exc) + return {"namespaces": namespaces, "count": len(namespaces)} + + +@app.post("/v1/namespaces") +async def declare_namespace( + request: NamespaceDeclareRequest, + http_request: Request, + agent_id: Optional[str] = Query(default=None), +): + token = get_token_from_request(http_request) + try: + payload = get_kernel().declare_namespace( + user_id=request.user_id, + namespace=request.namespace, + description=request.description, + token=token, + agent_id=agent_id, + ) + return payload + except PermissionError as exc: + raise require_session_error(exc) + + +@app.post("/v1/namespaces/permissions") +async def grant_namespace_permission( + request: NamespacePermissionRequest, + http_request: Request, + requester_agent_id: Optional[str] = Query(default=None), +): + token = get_token_from_request(http_request) + try: + payload = get_kernel().grant_namespace_permission( + user_id=request.user_id, + namespace=request.namespace, + agent_id=request.agent_id, + capability=request.capability, + expires_at=request.expires_at, + token=token, + requester_agent_id=requester_agent_id, + ) + return payload + except PermissionError as exc: + raise require_session_error(exc) + + +@app.post("/v1/agent-policies") +async def upsert_agent_policy( + request: AgentPolicyUpsertRequest, + http_request: Request, + requester_agent_id: Optional[str] = Query(default=None), +): + token = get_token_from_request(http_request) + try: + payload = get_kernel().upsert_agent_policy( + user_id=request.user_id, + agent_id=request.agent_id, + allowed_confidentiality_scopes=request.allowed_confidentiality_scopes, + allowed_capabilities=request.allowed_capabilities, + allowed_namespaces=request.allowed_namespaces, + token=token, + requester_agent_id=requester_agent_id, + ) + return payload + except PermissionError as exc: + raise require_session_error(exc) + + +@app.get("/v1/agent-policies") +async def list_agent_policies( + http_request: Request, + user_id: str = Query(default="default"), + agent_id: Optional[str] = Query(default=None), + include_wildcard: bool = Query(default=True), + requester_agent_id: Optional[str] = Query(default=None), +): + token = get_token_from_request(http_request) + try: + kernel = get_kernel() + if agent_id: + policy = kernel.get_agent_policy( + user_id=user_id, + agent_id=agent_id, + include_wildcard=include_wildcard, + token=token, + requester_agent_id=requester_agent_id, + ) + return {"policy": policy} + policies = kernel.list_agent_policies( + user_id=user_id, + token=token, + requester_agent_id=requester_agent_id, + ) + return {"policies": policies, "count": len(policies)} + except PermissionError as exc: + raise require_session_error(exc) + + +@app.delete("/v1/agent-policies") +async def delete_agent_policy( + http_request: Request, + user_id: str = Query(default="default"), + agent_id: str = Query(...), + requester_agent_id: Optional[str] = Query(default=None), +): + token = get_token_from_request(http_request) + try: + return get_kernel().delete_agent_policy( + user_id=user_id, + agent_id=agent_id, + token=token, + requester_agent_id=requester_agent_id, + ) + except PermissionError as exc: + raise require_session_error(exc) + + +@app.post("/v1/sleep/run") +async def run_sleep_cycle( + request: SleepRunRequest, + http_request: Request, + agent_id: Optional[str] = Query(default=None), +): + token = get_token_from_request(http_request) + try: + payload = get_memory().run_sleep_cycle( + user_id=request.user_id, + date_str=request.date, + apply_decay=request.apply_decay, + cleanup_stale_refs=request.cleanup_stale_refs, + token=token, + agent_id=agent_id, + ) + return payload + except PermissionError as exc: + raise require_session_error(exc) + + +# --------------------------------------------------------------------------- +# Legacy compatibility endpoints +# --------------------------------------------------------------------------- + + +@app.get("/v1/memories", response_model=Dict[str, Any]) +@app.get("/v1/memories/", response_model=Dict[str, Any]) +async def list_memories( + user_id: str = Query(default="default"), + agent_id: Optional[str] = Query(default=None), + layer: Optional[str] = Query(default=None), + limit: int = Query(default=50, ge=1, le=500), +): + memory = get_memory() + payload = memory.get_all(user_id=user_id, agent_id=agent_id, layer=layer, limit=limit) + memories = payload.get("results", payload) if isinstance(payload, dict) else payload + return {"memories": memories, "count": len(memories)} + + +@app.get("/v1/memories/{memory_id}", response_model=Dict[str, Any]) +@app.get("/v1/memories/{memory_id}/", response_model=Dict[str, Any]) +async def get_memory_by_id(memory_id: str): + memory = get_memory() + result = memory.get(memory_id) + if result is None: + raise HTTPException(status_code=404, detail="Memory not found") + return result + + +@app.put("/v1/memories/{memory_id}", response_model=Dict[str, Any]) +@app.put("/v1/memories/{memory_id}/", response_model=Dict[str, Any]) +async def update_memory(memory_id: str, request: Dict[str, Any]): + memory = get_memory() + result = memory.update(memory_id, request) + return result + + +@app.delete("/v1/memories/{memory_id}") +@app.delete("/v1/memories/{memory_id}/") +async def delete_memory(memory_id: str): + memory = get_memory() + memory.delete(memory_id) + return {"status": "deleted", "id": memory_id} + + +@app.delete("/v1/memories", response_model=Dict[str, Any]) +@app.delete("/v1/memories/", response_model=Dict[str, Any]) +async def delete_memories( + user_id: Optional[str] = Query(default=None), + agent_id: Optional[str] = Query(default=None), + run_id: Optional[str] = Query(default=None), + app_id: Optional[str] = Query(default=None), +): + memory = get_memory() + try: + return memory.delete_all(user_id=user_id, agent_id=agent_id, run_id=run_id, app_id=app_id) + except FadeMemValidationError as exc: + raise HTTPException(status_code=400, detail=exc.message) + + +@app.get("/v1/memories/{memory_id}/history", response_model=List[Dict[str, Any]]) +@app.get("/v1/memories/{memory_id}/history/", response_model=List[Dict[str, Any]]) +async def get_memory_history(memory_id: str): + return get_memory().history(memory_id) + + +@app.post("/v1/decay", response_model=DecayResponse) +async def apply_decay(request: DecayRequest): + memory = get_memory() + if request.dry_run: + return DecayResponse(decayed=0, forgotten=0, promoted=0, stale_refs_removed=0, dry_run=True) + result = memory.apply_decay(scope={"user_id": request.user_id, "agent_id": request.agent_id}) + return DecayResponse( + decayed=result.get("decayed", 0), + forgotten=result.get("forgotten", 0), + promoted=result.get("promoted", 0), + stale_refs_removed=result.get("stale_refs_removed", 0), + dry_run=False, + ) + + +@app.get("/v1/stats", response_model=StatsResponse) +async def get_stats( + user_id: Optional[str] = Query(default=None), + agent_id: Optional[str] = Query(default=None), +): + stats = get_memory().get_stats(user_id=user_id, agent_id=agent_id) + return StatsResponse( + total_memories=stats.get("total", 0), + sml_count=stats.get("sml_count", 0), + lml_count=stats.get("lml_count", 0), + categories=stats.get("categories", {}), + storage_mb=stats.get("storage_mb"), + ) + + +@app.get("/v1/categories") +async def list_categories(): + return {"categories": get_memory().get_categories()} + + +@app.get("/v1/categories/tree") +async def get_category_tree(): + return {"tree": get_memory().get_category_tree()} + + +@app.get("/v1/categories/{category_id}/summary") +async def get_category_summary(category_id: str, regenerate: bool = Query(default=False)): + summary = get_memory().get_category_summary(category_id, regenerate=regenerate) + return {"category_id": category_id, "summary": summary} + + +# --------------------------------------------------------------------------- +# Dashboard endpoints +# --------------------------------------------------------------------------- + +@app.get("/v1/profiles") +@app.get("/v1/profiles/") +async def list_profiles( + user_id: Optional[str] = Query(default=None), +): + return {"profiles": get_memory().get_all_profiles(user_id=user_id)} + + +@app.get("/v1/dashboard/constellation") +async def get_constellation( + user_id: Optional[str] = Query(default=None), + limit: int = Query(default=200, ge=1, le=1000), +): + return get_memory().get_constellation_data(user_id=user_id, limit=limit) + + +@app.get("/v1/decay-log") +async def get_decay_log( + limit: int = Query(default=20, ge=1, le=100), +): + return {"entries": get_memory().get_decay_log(limit=limit)} + + +_STATIC_DIR = os.path.join(os.path.dirname(__file__), "static") + + +@app.get("/dashboard") +async def serve_dashboard(): + html_path = os.path.join(_STATIC_DIR, "dashboard.html") + if not os.path.isfile(html_path): + raise HTTPException(status_code=404, detail="Dashboard not found") + return FileResponse(html_path, media_type="text/html") + + +# Mount static files last so it doesn't shadow API routes +if os.path.isdir(_STATIC_DIR): + app.mount("/static", StaticFiles(directory=_STATIC_DIR), name="static") + + +_sleep_cycle_thread: Optional[threading.Thread] = None +_sleep_cycle_stop = threading.Event() + + +def _sleep_cycle_worker() -> None: + interval_minutes_raw = os.environ.get("ENGRAM_V2_SLEEP_CYCLE_INTERVAL_MINUTES", "60") + try: + interval_minutes = max(5, int(interval_minutes_raw)) + except Exception: + interval_minutes = 60 + while not _sleep_cycle_stop.is_set(): + try: + get_memory().run_sleep_cycle( + user_id=None, + date_str=None, + apply_decay=feature_enabled("ENGRAM_V2_SLEEP_CYCLE_APPLY_DECAY", default=True), + cleanup_stale_refs=feature_enabled("ENGRAM_V2_SLEEP_CYCLE_REF_GC", default=True), + ) + except Exception: + logger.exception("Sleep cycle background run failed") + if _sleep_cycle_stop.wait(interval_minutes * 60): + break + + +@app.on_event("startup") +async def startup_events(): + global _sleep_cycle_thread + if not feature_enabled("ENGRAM_V2_SLEEP_CYCLE_ENABLED", default=False): + return + if _sleep_cycle_thread and _sleep_cycle_thread.is_alive(): + return + _sleep_cycle_stop.clear() + _sleep_cycle_thread = threading.Thread(target=_sleep_cycle_worker, daemon=True, name="engram-sleep-cycle") + _sleep_cycle_thread.start() + logger.info("Started sleep-cycle background worker") + + +@app.on_event("shutdown") +async def shutdown_events(): + if _sleep_cycle_thread and _sleep_cycle_thread.is_alive(): + _sleep_cycle_stop.set() + _sleep_cycle_thread.join(timeout=2) diff --git a/engram/api/auth.py b/engram/api/auth.py new file mode 100644 index 0000000..25269cb --- /dev/null +++ b/engram/api/auth.py @@ -0,0 +1,76 @@ +"""Auth/session helpers for Engram v2 API.""" + +from __future__ import annotations + +import os +import secrets +from typing import Optional + +from fastapi import HTTPException, Request + +from engram.core.policy import is_trusted_local_request + + +CLI_HINTS = {"engram-cli", "python-requests"} + + +def extract_bearer_token(authorization: Optional[str]) -> Optional[str]: + if not authorization: + return None + parts = authorization.strip().split(" ", 1) + if len(parts) != 2: + return None + scheme, token = parts + if scheme.lower() != "bearer": + return None + token = token.strip() + return token or None + + +def get_token_from_request(request: Request) -> Optional[str]: + auth_header = request.headers.get("Authorization") + return extract_bearer_token(auth_header) + + +def is_trusted_local_client(request: Request) -> bool: + client_host = request.client.host if request.client else None + return is_trusted_local_request(client_host) + + +def is_trusted_direct_client(request: Request) -> bool: + if not is_trusted_local_client(request): + return False + + # Explicit override for CLI calls. + client_hint = (request.headers.get("X-Engram-Client") or "").strip().lower() + if client_hint == "cli": + return True + + user_agent = (request.headers.get("User-Agent") or "").lower() + return any(hint in user_agent for hint in CLI_HINTS) + + +def require_token_for_untrusted_request(request: Request, token: Optional[str]) -> None: + if token: + return + if is_trusted_local_client(request): + return + raise HTTPException(status_code=401, detail="Bearer capability token required") + + +def enforce_session_issuer(request: Request) -> None: + """Lock session minting to trusted local callers with optional admin secret.""" + if not is_trusted_local_client(request): + raise HTTPException(status_code=403, detail="Session creation allowed only from local trusted clients") + + expected = (os.environ.get("ENGRAM_ADMIN_KEY") or "").strip() + if not expected: + return + + provided = (request.headers.get("X-Engram-Admin-Key") or "").strip() + if not provided or not secrets.compare_digest(provided, expected): + raise HTTPException(status_code=403, detail="Invalid admin key for session creation") + + +def require_session_error(exc: Exception) -> HTTPException: + return HTTPException(status_code=401, detail=str(exc)) diff --git a/engram/api/schemas.py b/engram/api/schemas.py new file mode 100644 index 0000000..2e6534f --- /dev/null +++ b/engram/api/schemas.py @@ -0,0 +1,105 @@ +"""Pydantic schemas for Engram v2 API.""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Union + +from pydantic import BaseModel, Field + +from engram.core.policy import ALL_CONFIDENTIALITY_SCOPES, DEFAULT_CAPABILITIES + + +class SessionCreateRequest(BaseModel): + user_id: str = Field(default="default") + agent_id: Optional[str] = Field(default=None) + allowed_confidentiality_scopes: List[str] = Field(default_factory=lambda: ["work"]) + capabilities: List[str] = Field(default_factory=lambda: list(DEFAULT_CAPABILITIES)) + namespaces: List[str] = Field(default_factory=lambda: ["default"]) + ttl_minutes: int = Field(default=24 * 60, ge=1, le=60 * 24 * 30) + + +class SessionCreateResponse(BaseModel): + session_id: str + token: str + expires_at: str + allowed_confidentiality_scopes: List[str] + capabilities: List[str] + namespaces: List[str] + + +class SearchRequestV2(BaseModel): + query: str + user_id: str = Field(default="default") + agent_id: Optional[str] = Field(default=None) + limit: int = Field(default=10, ge=1, le=100) + categories: Optional[List[str]] = Field(default=None) + + +class AddMemoryRequestV2(BaseModel): + content: Optional[str] = Field(default=None) + messages: Optional[Union[str, List[Dict[str, Any]]]] = Field(default=None) + user_id: str = Field(default="default") + agent_id: Optional[str] = Field(default=None) + metadata: Optional[Dict[str, Any]] = Field(default=None) + categories: Optional[List[str]] = Field(default=None) + scope: Optional[str] = Field(default="work") + namespace: Optional[str] = Field(default="default") + mode: str = Field(default="staging", description="staging|direct") + infer: bool = Field(default=False) + source_app: Optional[str] = Field(default=None) + source_type: str = Field(default="rest") + source_event_id: Optional[str] = Field(default=None) + + +class SceneSearchRequest(BaseModel): + query: str + user_id: str = Field(default="default") + agent_id: Optional[str] = Field(default=None) + limit: int = Field(default=10, ge=1, le=100) + + +class CommitResolutionRequest(BaseModel): + reason: Optional[str] = Field(default=None) + + +class ConflictResolutionRequest(BaseModel): + resolution: str = Field(description="UNRESOLVED|KEEP_EXISTING|ACCEPT_PROPOSED|KEEP_BOTH") + + +class DailyDigestResponse(BaseModel): + date: str + user_id: str + top_conflicts: List[Dict[str, Any]] + top_proposed_consolidations: List[Dict[str, Any]] + scene_highlights: List[Dict[str, Any]] = Field(default_factory=list) + + +class SleepRunRequest(BaseModel): + user_id: Optional[str] = Field(default=None) + date: Optional[str] = Field(default=None) + apply_decay: bool = Field(default=True) + cleanup_stale_refs: bool = Field(default=True) + + +class NamespaceDeclareRequest(BaseModel): + user_id: str = Field(default="default") + namespace: str + description: Optional[str] = Field(default=None) + + +class NamespacePermissionRequest(BaseModel): + user_id: str = Field(default="default") + namespace: str + agent_id: str + capability: str = Field(default="read") + expires_at: Optional[str] = Field(default=None) + + +class AgentPolicyUpsertRequest(BaseModel): + user_id: str = Field(default="default") + agent_id: str + allowed_confidentiality_scopes: List[str] = Field( + default_factory=lambda: list(ALL_CONFIDENTIALITY_SCOPES) + ) + allowed_capabilities: List[str] = Field(default_factory=lambda: list(DEFAULT_CAPABILITIES)) + allowed_namespaces: List[str] = Field(default_factory=lambda: ["default"]) diff --git a/engram/api/server.py b/engram/api/server.py index bbaaaea..993cb6e 100644 --- a/engram/api/server.py +++ b/engram/api/server.py @@ -1,438 +1,11 @@ -"""Engram REST API Server. +"""Compatibility wrapper for Engram API server. -FastAPI-based HTTP server for the Engram memory layer. -Provides a standard REST interface for memory operations. - -Usage: - engram-api # Start server on default port 8100 - engram-api --port 8080 # Custom port - engram-api --host 0.0.0.0 # Bind to all interfaces +v2 implementation lives in ``engram.api.app``. """ from __future__ import annotations -import logging -import os -from typing import Any, Dict, List, Optional, Union - -from fastapi import FastAPI, HTTPException, Query -from fastapi.middleware.cors import CORSMiddleware -from pydantic import BaseModel, Field - -from engram import Memory, MemoryConfig -from engram.exceptions import FadeMemValidationError -from engram.observability import metrics, logger as structured_logger, add_metrics_routes - -logger = logging.getLogger(__name__) - -# API Models -class AddMemoryRequest(BaseModel): - """Request model for adding memories.""" - content: Optional[str] = Field(default=None, description="Memory content to store") - messages: Optional[Union[str, List[Dict[str, Any]]]] = Field( - default=None, - description="Alias for content or a list of chat messages", - ) - user_id: Optional[str] = Field(default="default", description="User identifier") - agent_id: Optional[str] = Field(default=None, description="Agent identifier") - metadata: Optional[Dict[str, Any]] = Field(default=None, description="Additional metadata") - categories: Optional[List[str]] = Field(default=None, description="Category tags") - agent_category: Optional[str] = Field(default=None, description="Agent category for scope sharing") - connector_id: Optional[str] = Field(default=None, description="Connector identifier for scope sharing") - scope: Optional[str] = Field(default=None, description="Memory scope (agent|connector|category|global)") - source_app: Optional[str] = Field(default=None, description="Source application identifier") - infer: bool = Field(default=True, description="Whether to extract facts from content") - - -class SearchRequest(BaseModel): - """Request model for searching memories.""" - query: str = Field(..., description="Search query") - user_id: Optional[str] = Field(default="default", description="User identifier") - agent_id: Optional[str] = Field(default=None, description="Agent identifier") - agent_category: Optional[str] = Field(default=None, description="Agent category for scope sharing") - limit: int = Field(default=10, ge=1, le=100, description="Max results to return") - categories: Optional[List[str]] = Field(default=None, description="Filter by categories") - connector_ids: Optional[List[str]] = Field(default=None, description="Connector IDs to include") - scope_filter: Optional[List[str]] = Field(default=None, description="Restrict to specific scopes") - scope: Optional[Union[str, List[str]]] = Field( - default=None, - description="Alias for scope_filter (agent|connector|category|global)", - ) - - -class UpdateMemoryRequest(BaseModel): - """Request model for updating a memory.""" - content: Optional[str] = Field(default=None, description="New content") - data: Optional[str] = Field(default=None, description="Alias for content") - metadata: Optional[Dict[str, Any]] = Field(default=None, description="Metadata to merge") - - -class DecayRequest(BaseModel): - """Request model for applying decay.""" - user_id: Optional[str] = Field(default=None, description="Scope to specific user") - agent_id: Optional[str] = Field(default=None, description="Scope to specific agent") - dry_run: bool = Field(default=False, description="Preview without applying changes") - - -class MemoryResponse(BaseModel): - """Response model for a single memory.""" - id: str - content: str - user_id: Optional[str] = None - agent_id: Optional[str] = None - metadata: Dict[str, Any] = Field(default_factory=dict) - categories: List[str] = Field(default_factory=list) - layer: str = "sml" - strength: float = 1.0 - created_at: Optional[str] = None - - -class SearchResultResponse(BaseModel): - """Response model for search results.""" - results: List[Dict[str, Any]] - count: int - - -class StatsResponse(BaseModel): - """Response model for memory statistics.""" - total_memories: int - sml_count: int - lml_count: int - categories: Dict[str, int] - storage_mb: Optional[float] = None - - -class DecayResponse(BaseModel): - """Response model for decay operation.""" - decayed: int - forgotten: int - promoted: int - dry_run: bool - - -# Initialize FastAPI app -app = FastAPI( - title="Engram API", - description="Bio-inspired memory layer for AI agents with forgetting, echo encoding, and dynamic categories", - version="0.1.0", - docs_url="/docs", - redoc_url="/redoc", -) - -# CORS middleware for browser access -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - -# Add metrics endpoints (/metrics, /metrics/json) -add_metrics_routes(app) - -# Global memory instance (initialized on startup) -_memory: Optional[Memory] = None - - -def get_memory() -> Memory: - """Get or create the global Memory instance.""" - global _memory - if _memory is None: - _memory = Memory() - return _memory - - -# Health check -@app.get("/health") -async def health_check(): - """Health check endpoint.""" - return {"status": "healthy", "service": "engram"} - - -@app.get("/v1/version") -async def get_version(): - """Get API version.""" - from engram import __version__ - return {"version": __version__, "api_version": "v1"} - - -# Memory CRUD operations -@app.post("/v1/memories", response_model=Dict[str, Any]) -@app.post("/v1/memories/", response_model=Dict[str, Any]) -async def add_memory(request: AddMemoryRequest): - """Add a new memory. - - Stores content with optional metadata and categories. - If infer=True (default), extracts facts from the content. - """ - with metrics.measure("api_add", user_id=request.user_id): - try: - memory = get_memory() - messages = request.content if request.content is not None else request.messages - if messages is None: - raise HTTPException(status_code=400, detail="content or messages is required") - result = memory.add( - messages=messages, - user_id=request.user_id, - agent_id=request.agent_id, - metadata=request.metadata, - categories=request.categories, - agent_category=request.agent_category, - connector_id=request.connector_id, - scope=request.scope, - source_app=request.source_app, - infer=request.infer, - ) - metrics.record_add(0, count=len(result.get("results", [1]))) - return result - except Exception as e: - logger.error(f"Error adding memory: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -@app.get("/v1/memories", response_model=Dict[str, Any]) -@app.get("/v1/memories/", response_model=Dict[str, Any]) -async def list_memories( - user_id: str = Query(default="default", description="User identifier"), - agent_id: Optional[str] = Query(default=None, description="Agent identifier"), - layer: Optional[str] = Query(default=None, description="Filter by layer (sml/lml)"), - limit: int = Query(default=50, ge=1, le=500, description="Max results"), -): - """List all memories for a user/agent.""" - try: - memory = get_memory() - payload = memory.get_all( - user_id=user_id, - agent_id=agent_id, - layer=layer, - limit=limit, - ) - memories = payload.get("results", payload) if isinstance(payload, dict) else payload - return {"memories": memories, "count": len(memories)} - except Exception as e: - logger.error(f"Error listing memories: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -@app.get("/v1/memories/{memory_id}", response_model=Dict[str, Any]) -@app.get("/v1/memories/{memory_id}/", response_model=Dict[str, Any]) -async def get_memory_by_id(memory_id: str): - """Get a specific memory by ID.""" - try: - memory = get_memory() - result = memory.get(memory_id) - if result is None: - raise HTTPException(status_code=404, detail="Memory not found") - return result - except HTTPException: - raise - except Exception as e: - logger.error(f"Error getting memory: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -@app.put("/v1/memories/{memory_id}", response_model=Dict[str, Any]) -@app.put("/v1/memories/{memory_id}/", response_model=Dict[str, Any]) -async def update_memory(memory_id: str, request: UpdateMemoryRequest): - """Update an existing memory.""" - try: - memory = get_memory() - update_data = {} - content = request.content if request.content is not None else request.data - if content is not None: - update_data["content"] = content - if request.metadata is not None: - update_data["metadata"] = request.metadata - - if not update_data: - raise HTTPException(status_code=400, detail="No update data provided") - - result = memory.update(memory_id, update_data) - return result - except HTTPException: - raise - except Exception as e: - logger.error(f"Error updating memory: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -@app.delete("/v1/memories/{memory_id}") -@app.delete("/v1/memories/{memory_id}/") -async def delete_memory(memory_id: str): - """Delete a memory by ID.""" - try: - memory = get_memory() - memory.delete(memory_id) - return {"status": "deleted", "id": memory_id} - except Exception as e: - logger.error(f"Error deleting memory: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -@app.delete("/v1/memories", response_model=Dict[str, Any]) -@app.delete("/v1/memories/", response_model=Dict[str, Any]) -async def delete_memories( - user_id: Optional[str] = Query(default=None, description="User identifier"), - agent_id: Optional[str] = Query(default=None, description="Agent identifier"), - run_id: Optional[str] = Query(default=None, description="Run identifier"), - app_id: Optional[str] = Query(default=None, description="App identifier"), -): - """Delete all memories matching filters.""" - try: - memory = get_memory() - result = memory.delete_all( - user_id=user_id, - agent_id=agent_id, - run_id=run_id, - app_id=app_id, - ) - return result - except FadeMemValidationError as e: - raise HTTPException(status_code=400, detail=e.message) - except Exception as e: - logger.error(f"Error deleting memories: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -@app.get("/v1/memories/{memory_id}/history", response_model=List[Dict[str, Any]]) -@app.get("/v1/memories/{memory_id}/history/", response_model=List[Dict[str, Any]]) -async def get_memory_history(memory_id: str): - """Get history for a specific memory.""" - try: - memory = get_memory() - return memory.history(memory_id) - except Exception as e: - logger.error(f"Error getting memory history: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -# Search -@app.post("/v1/search", response_model=SearchResultResponse) -@app.post("/v1/search/", response_model=SearchResultResponse) -@app.post("/v1/memories/search", response_model=SearchResultResponse) -@app.post("/v1/memories/search/", response_model=SearchResultResponse) -async def search_memories(request: SearchRequest): - """Search memories using semantic similarity. - - Uses vector search with optional category filtering. - Results are ranked by composite score (similarity * strength). - """ - with metrics.measure("api_search", user_id=request.user_id): - try: - memory = get_memory() - payload = memory.search( - query=request.query, - user_id=request.user_id, - agent_id=request.agent_id, - limit=request.limit, - categories=request.categories, - agent_category=request.agent_category, - connector_ids=request.connector_ids, - scope_filter=request.scope_filter or request.scope, - ) - results = payload.get("results", payload) if isinstance(payload, dict) else payload - metrics.record_search(0, results_count=len(results)) - return SearchResultResponse( - results=results, - count=len(results), - ) - except Exception as e: - logger.error(f"Error searching memories: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -# Decay operations -@app.post("/v1/decay", response_model=DecayResponse) -async def apply_decay(request: DecayRequest): - """Apply memory decay (forgetting). - - Uses Ebbinghaus curve to decay memory strength. - Memories below threshold are forgotten. - High-access memories may be promoted to long-term. - """ - try: - memory = get_memory() - - if request.dry_run: - # TODO: Implement dry-run preview - return DecayResponse(decayed=0, forgotten=0, promoted=0, dry_run=True) - - result = memory.apply_decay( - user_id=request.user_id, - agent_id=request.agent_id, - ) - return DecayResponse( - decayed=result.get("decayed", 0), - forgotten=result.get("forgotten", 0), - promoted=result.get("promoted", 0), - dry_run=False, - ) - except Exception as e: - logger.error(f"Error applying decay: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -# Statistics -@app.get("/v1/stats", response_model=StatsResponse) -async def get_stats( - user_id: Optional[str] = Query(default=None, description="Filter by user"), - agent_id: Optional[str] = Query(default=None, description="Filter by agent"), -): - """Get memory statistics.""" - try: - memory = get_memory() - stats = memory.get_stats(user_id=user_id, agent_id=agent_id) - return StatsResponse( - total_memories=stats.get("total", 0), - sml_count=stats.get("sml_count", 0), - lml_count=stats.get("lml_count", 0), - categories=stats.get("categories", {}), - storage_mb=stats.get("storage_mb"), - ) - except Exception as e: - logger.error(f"Error getting stats: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -# Category operations -@app.get("/v1/categories") -async def list_categories(): - """List all categories with hierarchy.""" - try: - memory = get_memory() - categories = memory.get_categories() - return {"categories": categories} - except Exception as e: - logger.error(f"Error listing categories: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -@app.get("/v1/categories/tree") -async def get_category_tree(): - """Get category tree structure.""" - try: - memory = get_memory() - tree = memory.get_category_tree() - return {"tree": tree} - except Exception as e: - logger.error(f"Error getting category tree: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -@app.get("/v1/categories/{category_id}/summary") -async def get_category_summary( - category_id: str, - regenerate: bool = Query(default=False, description="Force regenerate summary"), -): - """Get AI-generated summary for a category.""" - try: - memory = get_memory() - summary = memory.get_category_summary(category_id, regenerate=regenerate) - return {"category_id": category_id, "summary": summary} - except Exception as e: - logger.error(f"Error getting category summary: {e}") - raise HTTPException(status_code=500, detail=str(e)) +from engram.api.app import app def run(): @@ -450,7 +23,7 @@ def run(): print(f"API docs available at http://{args.host}:{args.port}/docs") uvicorn.run( - "engram.api.server:app", + "engram.api.app:app", host=args.host, port=args.port, reload=args.reload, diff --git a/engram/api/static/dashboard.html b/engram/api/static/dashboard.html new file mode 100644 index 0000000..fc0bc8b --- /dev/null +++ b/engram/api/static/dashboard.html @@ -0,0 +1,736 @@ + + + + + +Engram Memory Visualizer + + + + + + + + +
+
+ + +
+ +
+ +
+
+
+

Memory Layers

+

Top Categories

+

Decay History

+
+
+ + +
+
+
+ +
+ +
+
+ + +
+
+
+ + +
+
+
+ + +
+
+
+ + +
+
+
+
+
+
+ + + + + + diff --git a/engram/core/invariants.py b/engram/core/invariants.py new file mode 100644 index 0000000..8c86560 --- /dev/null +++ b/engram/core/invariants.py @@ -0,0 +1,110 @@ +"""Invariant validation for staged writes.""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Tuple + + +_EMAIL_RE = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b") +_NAME_RE = re.compile(r"\b(?:my\s+name\s+is|name:)\s*([A-Za-z][A-Za-z\s'-]{1,80})", re.IGNORECASE) +_LOCATION_RE = re.compile(r"\b(?:i\s+live\s+in|based\s+in|location:)\s*([A-Za-z][A-Za-z\s'-]{1,80})", re.IGNORECASE) +_SECRET_RE = re.compile(r"\b(password|api[_\s-]?key|secret|access token|private key)\b", re.IGNORECASE) + + +@dataclass +class InvariantConflict: + key: str + existing: str + proposed: str + + +class InvariantEngine: + def __init__(self, db): + self.db = db + + def evaluate_add(self, *, user_id: str, content: str) -> Dict[str, Any]: + checks: Dict[str, Any] = { + "invariants_ok": True, + "conflicts": [], + "risk_score": 0.0, + "duplicate_of": None, + "pii_risk": False, + } + + existing = self.db.get_all_memories(user_id=user_id, include_tombstoned=False) + normalized_content = (content or "").strip().lower() + for mem in existing: + existing_text = (mem.get("memory") or "").strip().lower() + if existing_text and existing_text == normalized_content: + checks["duplicate_of"] = mem.get("id") + checks["risk_score"] = max(checks["risk_score"], 0.35) + break + + proposed_pairs = self.extract_invariant_pairs(content) + conflicts: List[InvariantConflict] = [] + for key, proposed in proposed_pairs: + current = self.db.get_invariant(user_id, key) + if not current: + continue + current_value = str(current.get("invariant_value", "")).strip() + if current_value and current_value.lower() != str(proposed).strip().lower(): + conflicts.append( + InvariantConflict( + key=key, + existing=current_value, + proposed=str(proposed).strip(), + ) + ) + + if conflicts: + checks["invariants_ok"] = False + checks["conflicts"] = [ + { + "key": c.key, + "existing": c.existing, + "proposed": c.proposed, + } + for c in conflicts + ] + checks["risk_score"] = max(checks["risk_score"], 0.72) + + pii_risk = bool(_SECRET_RE.search(content or "")) + if pii_risk: + checks["pii_risk"] = True + checks["risk_score"] = max(checks["risk_score"], 0.85) + + if not conflicts and not pii_risk and checks["duplicate_of"] is None: + checks["risk_score"] = max(checks["risk_score"], 0.15) + + return checks + + def extract_invariant_pairs(self, content: str) -> List[Tuple[str, str]]: + text = content or "" + pairs: List[Tuple[str, str]] = [] + + name_match = _NAME_RE.search(text) + if name_match: + pairs.append(("identity.name", name_match.group(1).strip())) + + email_match = _EMAIL_RE.search(text) + if email_match: + pairs.append(("identity.primary_email", email_match.group(0).strip())) + + location_match = _LOCATION_RE.search(text) + if location_match: + pairs.append(("identity.location", location_match.group(1).strip())) + + return pairs + + def upsert_invariants_from_content(self, *, user_id: str, content: str, source_memory_id: Optional[str]) -> None: + for key, value in self.extract_invariant_pairs(content): + self.db.upsert_invariant( + user_id=user_id, + invariant_key=key, + invariant_value=value, + category="identity", + confidence=0.9, + source_memory_id=source_memory_id, + ) diff --git a/engram/core/kernel.py b/engram/core/kernel.py new file mode 100644 index 0000000..ab5c2a9 --- /dev/null +++ b/engram/core/kernel.py @@ -0,0 +1,1346 @@ +"""Personal Memory Kernel (PMK) orchestrator for Engram v2.""" + +from __future__ import annotations + +import hashlib +import os +import secrets +import time +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional, Set + +from engram.core.invariants import InvariantEngine +from engram.core.policy import ( + CONFIDENTIALITY_SCOPES, + DEFAULT_CAPABILITIES, + default_allowed_scopes, + detect_confidentiality_scope, + enforce_scope_on_results, + feature_enabled, + normalize_confidentiality_scope, + token_required_for_agent, +) +from engram.core.provenance import build_provenance +from engram.decay.refcounts import RefCountManager +from engram.memory.episodic_store import EpisodicStore +from engram.memory.staging_store import StagingStore +from engram.observability import metrics +from engram.retrieval.dual_search import DualSearchEngine + + +class PersonalMemoryKernel: + """Coordinates policy, retrieval, and staged writes for v2.""" + + def __init__(self, memory): + self.memory = memory + self.db = memory.db + self.staging_store = StagingStore(self.db) + self.invariant_engine = InvariantEngine(self.db) + self.ref_manager = RefCountManager(self.db) + self.episodic_store = EpisodicStore(self.db, memory.embedder) + self.dual_search = DualSearchEngine( + memory=memory, + episodic_store=self.episodic_store, + ref_manager=self.ref_manager, + ) + + # ------------------------------------------------------------------ + # Sessions / auth + # ------------------------------------------------------------------ + + def create_session( + self, + *, + user_id: str, + agent_id: Optional[str], + allowed_confidentiality_scopes: Optional[List[str]] = None, + capabilities: Optional[List[str]] = None, + namespaces: Optional[List[str]] = None, + ttl_minutes: int = 24 * 60, + ) -> Dict[str, Any]: + scopes = allowed_confidentiality_scopes or ["work"] + normalized_scopes = sorted( + { + normalize_confidentiality_scope(scope) + for scope in scopes + if normalize_confidentiality_scope(scope) in CONFIDENTIALITY_SCOPES + } + ) + if not normalized_scopes: + normalized_scopes = ["work"] + + normalized_capabilities = sorted( + { + str(cap).strip().lower() + for cap in (capabilities or DEFAULT_CAPABILITIES) + if str(cap).strip() + } + ) + if not normalized_capabilities: + normalized_capabilities = list(DEFAULT_CAPABILITIES) + + normalized_namespaces = self._resolve_session_namespaces( + user_id=user_id, + agent_id=agent_id, + namespaces=namespaces, + ) + policy = None + if agent_id: + policy = self.db.get_agent_policy( + user_id=user_id, + agent_id=agent_id, + include_wildcard=True, + ) + require_policy = feature_enabled("ENGRAM_V2_REQUIRE_AGENT_POLICY", default=False) + if require_policy and not policy: + raise PermissionError(f"No agent policy configured for user={user_id} agent={agent_id}") + + if policy: + normalized_scopes = self._clamp_scopes_with_policy( + requested_scopes=normalized_scopes, + policy_scopes=policy.get("allowed_confidentiality_scopes", []), + user_id=user_id, + agent_id=agent_id, + ) + normalized_capabilities = self._clamp_capabilities_with_policy( + requested_capabilities=normalized_capabilities, + policy_capabilities=policy.get("allowed_capabilities", []), + user_id=user_id, + agent_id=agent_id, + ) + normalized_namespaces = self._clamp_namespaces_with_policy( + requested_namespaces=normalized_namespaces, + policy_namespaces=policy.get("allowed_namespaces", []), + user_id=user_id, + agent_id=agent_id, + ) + for namespace in normalized_namespaces: + if namespace == "*": + continue + self.db.ensure_namespace(user_id=user_id, name=namespace) + + token = secrets.token_urlsafe(32) + token_hash = hashlib.sha256(token.encode("utf-8")).hexdigest() + expires_at = (datetime.utcnow() + timedelta(minutes=max(1, ttl_minutes))).isoformat() + + session_id = self.db.create_session( + { + "token_hash": token_hash, + "user_id": user_id, + "agent_id": agent_id, + "allowed_confidentiality_scopes": normalized_scopes, + "capabilities": normalized_capabilities, + "namespaces": normalized_namespaces, + "expires_at": expires_at, + } + ) + return { + "session_id": session_id, + "token": token, + "expires_at": expires_at, + "allowed_confidentiality_scopes": normalized_scopes, + "capabilities": normalized_capabilities, + "namespaces": normalized_namespaces, + } + + def authenticate_session( + self, + *, + token: Optional[str], + user_id: Optional[str], + agent_id: Optional[str], + require_for_agent: bool = True, + required_capabilities: Optional[List[str]] = None, + ) -> Optional[Dict[str, Any]]: + if not token: + if require_for_agent and token_required_for_agent(agent_id): + raise PermissionError("Capability token required for agent access") + return None + + token_hash = hashlib.sha256(token.encode("utf-8")).hexdigest() + session = self.db.get_session_by_token_hash(token_hash) + if not session: + raise PermissionError("Invalid capability token") + + revoked_at = session.get("revoked_at") + if revoked_at: + raise PermissionError("Session has been revoked") + + expires_at = session.get("expires_at") + if expires_at: + exp_dt = datetime.fromisoformat(expires_at) + if datetime.utcnow() > exp_dt: + raise PermissionError("Session expired") + + if user_id and session.get("user_id") not in {None, user_id}: + raise PermissionError("Session user scope mismatch") + + if agent_id and session.get("agent_id") and session.get("agent_id") != agent_id: + raise PermissionError("Session agent scope mismatch") + + required_caps = [str(cap).strip().lower() for cap in (required_capabilities or []) if str(cap).strip()] + if required_caps: + session_caps = {str(cap).strip().lower() for cap in (session.get("capabilities") or []) if str(cap).strip()} + if "*" not in session_caps: + missing = [cap for cap in required_caps if cap not in session_caps] + if missing: + missing_str = ", ".join(sorted(set(missing))) + raise PermissionError(f"Session missing required capability: {missing_str}") + + return session + + @staticmethod + def _normalize_namespace(value: Optional[str]) -> str: + ns = str(value or "default").strip() + return ns or "default" + + def _resolve_session_namespaces( + self, + *, + user_id: str, + agent_id: Optional[str], + namespaces: Optional[List[str]], + ) -> List[str]: + if namespaces: + resolved = sorted({self._normalize_namespace(ns) for ns in namespaces if str(ns).strip()}) + elif agent_id: + resolved = self.db.get_agent_allowed_namespaces(user_id=user_id, agent_id=agent_id, capability="read") + else: + resolved = ["default"] + if not resolved: + resolved = ["default"] + return resolved + + @staticmethod + def _normalize_policy_namespaces(namespaces: Optional[List[str]]) -> List[str]: + values = sorted({str(namespace).strip() for namespace in (namespaces or []) if str(namespace).strip()}) + return values + + @staticmethod + def _normalize_policy_capabilities(capabilities: Optional[List[str]]) -> List[str]: + values = sorted( + { + str(capability).strip().lower() + for capability in (capabilities or []) + if str(capability).strip() + } + ) + return values + + @staticmethod + def _normalize_policy_scopes(scopes: Optional[List[str]]) -> List[str]: + values = sorted( + { + normalize_confidentiality_scope(scope) + for scope in (scopes or []) + if normalize_confidentiality_scope(scope) in CONFIDENTIALITY_SCOPES + } + ) + return values + + def _clamp_scopes_with_policy( + self, + *, + requested_scopes: List[str], + policy_scopes: Optional[List[str]], + user_id: str, + agent_id: Optional[str], + ) -> List[str]: + allowed = self._normalize_policy_scopes(policy_scopes) + if "*" in set(str(scope).strip() for scope in (policy_scopes or [])): + return requested_scopes + if not allowed: + raise PermissionError( + f"Agent policy denies confidentiality scopes for user={user_id} agent={agent_id}" + ) + clamped = [scope for scope in requested_scopes if scope in set(allowed)] + if not clamped: + raise PermissionError( + f"Requested confidentiality scopes are not allowed by policy for user={user_id} agent={agent_id}" + ) + return sorted(set(clamped)) + + def _clamp_capabilities_with_policy( + self, + *, + requested_capabilities: List[str], + policy_capabilities: Optional[List[str]], + user_id: str, + agent_id: Optional[str], + ) -> List[str]: + allowed = self._normalize_policy_capabilities(policy_capabilities) + if "*" in set(allowed): + return sorted(set(requested_capabilities)) + if not allowed: + raise PermissionError( + f"Agent policy denies capabilities for user={user_id} agent={agent_id}" + ) + clamped = [capability for capability in requested_capabilities if capability in set(allowed)] + if not clamped: + raise PermissionError( + f"Requested capabilities are not allowed by policy for user={user_id} agent={agent_id}" + ) + return sorted(set(clamped)) + + def _clamp_namespaces_with_policy( + self, + *, + requested_namespaces: List[str], + policy_namespaces: Optional[List[str]], + user_id: str, + agent_id: Optional[str], + ) -> List[str]: + allowed = self._normalize_policy_namespaces(policy_namespaces) + if "*" in set(allowed): + return sorted(set(requested_namespaces)) + if not allowed: + raise PermissionError(f"Agent policy denies namespaces for user={user_id} agent={agent_id}") + clamped = [namespace for namespace in requested_namespaces if namespace in set(allowed)] + if not clamped: + raise PermissionError( + f"Requested namespaces are not allowed by policy for user={user_id} agent={agent_id}" + ) + return sorted(set(clamped)) + + def _resolve_allowed_namespaces( + self, + *, + session: Optional[Dict[str, Any]], + user_id: str, + agent_id: Optional[str], + capability: str, + ) -> List[str]: + if not feature_enabled("ENGRAM_V2_POLICY_GATEWAY", default=True): + return ["*"] + if session and session.get("namespaces"): + return sorted({self._normalize_namespace(ns) for ns in session.get("namespaces", [])}) + if agent_id: + return self.db.get_agent_allowed_namespaces(user_id=user_id, agent_id=agent_id, capability=capability) + return ["*"] + + @staticmethod + def _is_namespace_allowed(namespace: str, allowed_namespaces: List[str]) -> bool: + return "*" in allowed_namespaces or namespace in set(allowed_namespaces) + + def _mask_for_namespace(self, item: Dict[str, Any]) -> Dict[str, Any]: + return { + "id": item.get("id"), + "type": "private_event", + "time": item.get("created_at") or item.get("timestamp") or item.get("start_time"), + "importance": item.get("importance", item.get("scene_strength", 0.5)), + "details": "[REDACTED]", + "masked": True, + } + + def _enforce_namespaces_on_results( + self, + items: List[Dict[str, Any]], + allowed_namespaces: List[str], + ) -> List[Dict[str, Any]]: + if "*" in allowed_namespaces: + visible = [] + for item in items: + value = dict(item) + value["masked"] = False + visible.append(value) + return visible + filtered: List[Dict[str, Any]] = [] + for item in items: + namespace = self._normalize_namespace(item.get("namespace")) + if self._is_namespace_allowed(namespace, allowed_namespaces): + value = dict(item) + value["masked"] = bool(value.get("masked", False)) + filtered.append(value) + else: + filtered.append(self._mask_for_namespace(item)) + return filtered + + @staticmethod + def _parse_float_env(name: str, default: float) -> float: + try: + return float(os.environ.get(name, default)) + except Exception: + return float(default) + + @staticmethod + def _parse_int_env(name: str, default: int) -> int: + try: + return int(os.environ.get(name, default)) + except Exception: + return int(default) + + def _passes_auto_merge_guardrails(self, trust_row: Dict[str, Any]) -> bool: + total = int(trust_row.get("total_proposals", 0) or 0) + approved = int(trust_row.get("approved_proposals", 0) or 0) + rejected = int(trust_row.get("rejected_proposals", 0) or 0) + + min_total = self._parse_int_env("ENGRAM_V2_AUTO_MERGE_MIN_TOTAL", 10) + min_approved = self._parse_int_env("ENGRAM_V2_AUTO_MERGE_MIN_APPROVED", 7) + max_reject_rate = self._parse_float_env("ENGRAM_V2_AUTO_MERGE_MAX_REJECT_RATE", 0.2) + + if total < max(1, min_total): + return False + if approved < max(1, min_approved): + return False + + rejection_rate = (rejected / total) if total > 0 else 1.0 + return rejection_rate <= max(0.0, max_reject_rate) + + # ------------------------------------------------------------------ + # Read path + # ------------------------------------------------------------------ + + def search( + self, + *, + query: str, + user_id: str, + agent_id: Optional[str], + token: Optional[str], + limit: int = 10, + categories: Optional[List[str]] = None, + ) -> Dict[str, Any]: + session = self.authenticate_session( + token=token, + user_id=user_id, + agent_id=agent_id, + require_for_agent=True, + required_capabilities=["search"] if (token or agent_id) else None, + ) + + allowed_scopes = default_allowed_scopes() + if session: + allowed_scopes = session.get("allowed_confidentiality_scopes") + allowed_namespaces = self._resolve_allowed_namespaces( + session=session, + user_id=user_id, + agent_id=agent_id, + capability="read", + ) + + use_dual = feature_enabled("ENGRAM_V2_DUAL_RETRIEVAL", default=True) + if use_dual: + result = self.dual_search.search( + query=query, + user_id=user_id, + agent_id=agent_id, + limit=limit, + categories=categories, + allowed_confidentiality_scopes=allowed_scopes, + allowed_namespaces=allowed_namespaces, + ) + else: + fallback = self.memory.search( + query=query, + user_id=user_id, + agent_id=agent_id, + limit=limit, + categories=categories, + ) + fallback_results = fallback.get("results", fallback) + masked_results = enforce_scope_on_results(fallback_results, allowed_scopes) + namespaced_results = self._enforce_namespaces_on_results(masked_results, allowed_namespaces) + final_results = namespaced_results[:limit] + masked_count = sum(1 for item in final_results if item.get("masked")) + result = { + "results": final_results, + "count": len(final_results), + "context_packet": { + "query": query, + "snippets": [], + "token_usage": {"estimated_tokens": 0, "budget": 0}, + "masking": {"masked_count": masked_count, "total_candidates": len(fallback_results)}, + }, + "scene_hits": [], + } + return result + + def search_scenes( + self, + *, + query: str, + user_id: str, + agent_id: Optional[str], + token: Optional[str], + limit: int = 10, + ) -> Dict[str, Any]: + session = self.authenticate_session( + token=token, + user_id=user_id, + agent_id=agent_id, + require_for_agent=True, + required_capabilities=["read_scene"] if (token or agent_id) else None, + ) + allowed_scopes = session.get("allowed_confidentiality_scopes") if session else default_allowed_scopes() + allowed_namespaces = self._resolve_allowed_namespaces( + session=session, + user_id=user_id, + agent_id=agent_id, + capability="read", + ) + scenes = self.episodic_store.search_scenes(user_id=user_id, query=query, limit=limit) + # Scene masking is coarse: mask summaries if no permitted scope. + masked_scenes = [] + for scene in scenes: + scene_namespace = self._normalize_namespace(scene.get("namespace")) + if not self._is_namespace_allowed(scene_namespace, allowed_namespaces): + masked_scenes.append(self._mask_for_namespace(scene)) + continue + if not feature_enabled("ENGRAM_V2_POLICY_GATEWAY", default=True): + visible_scene = dict(scene) + visible_scene["masked"] = False + masked_scenes.append(visible_scene) + continue + scope = normalize_confidentiality_scope(scene.get("confidentiality_scope") or "work") + if scope in set(allowed_scopes or []): + visible_scene = dict(scene) + visible_scene["masked"] = False + masked_scenes.append(visible_scene) + else: + masked_scenes.append( + { + "id": scene.get("id"), + "type": "private_event", + "time": scene.get("start_time"), + "importance": scene.get("scene_strength", 0.5), + "details": "[REDACTED]", + "masked": True, + } + ) + return {"scenes": masked_scenes, "count": len(masked_scenes)} + + def get_scene( + self, + *, + scene_id: str, + user_id: str, + agent_id: Optional[str], + token: Optional[str], + ) -> Optional[Dict[str, Any]]: + session = self.authenticate_session( + token=token, + user_id=user_id, + agent_id=agent_id, + require_for_agent=True, + required_capabilities=["read_scene"] if (token or agent_id) else None, + ) + + scene = self.memory.get_scene(scene_id) + if not scene: + return None + scene = dict(scene) + scene.pop("embedding", None) + + if not feature_enabled("ENGRAM_V2_POLICY_GATEWAY", default=True): + scene["masked"] = False + return scene + + allowed_scopes = session.get("allowed_confidentiality_scopes") if session else default_allowed_scopes() + allowed_namespaces = self._resolve_allowed_namespaces( + session=session, + user_id=user_id, + agent_id=agent_id, + capability="read", + ) + scene_namespace = self._normalize_namespace(scene.get("namespace")) + if not self._is_namespace_allowed(scene_namespace, allowed_namespaces): + return self._mask_for_namespace(scene) + scope = normalize_confidentiality_scope(scene.get("confidentiality_scope") or "work") + if scope in set(allowed_scopes or []): + scene["masked"] = False + return scene + return { + "id": scene.get("id"), + "type": "private_event", + "time": scene.get("start_time"), + "importance": scene.get("scene_strength", 0.5), + "details": "[REDACTED]", + "masked": True, + } + + # ------------------------------------------------------------------ + # Write path + # ------------------------------------------------------------------ + + def propose_write( + self, + *, + content: str, + user_id: str, + agent_id: Optional[str], + token: Optional[str], + categories: Optional[List[str]] = None, + metadata: Optional[Dict[str, Any]] = None, + scope: str = "work", + namespace: Optional[str] = None, + mode: str = "staging", + infer: bool = False, + source_app: Optional[str] = None, + trusted_direct: bool = False, + source_type: str = "mcp", + source_event_id: Optional[str] = None, + ) -> Dict[str, Any]: + metadata = dict(metadata or {}) + confidentiality_scope = detect_confidentiality_scope( + categories=categories, + metadata=metadata, + content=content, + explicit_scope=scope, + ) + metadata["confidentiality_scope"] = confidentiality_scope + namespace_value = self._normalize_namespace(namespace or metadata.get("namespace")) + metadata["namespace"] = namespace_value + + require_for_agent = (mode != "direct" or not trusted_direct) + session = self.authenticate_session( + token=token, + user_id=user_id, + agent_id=agent_id, + require_for_agent=require_for_agent, + required_capabilities=["propose_write"] if (token or require_for_agent) else None, + ) + if feature_enabled("ENGRAM_V2_POLICY_GATEWAY", default=True): + allowed_write_namespaces = self._resolve_allowed_namespaces( + session=session, + user_id=user_id, + agent_id=agent_id, + capability="write", + ) + if not self._is_namespace_allowed(namespace_value, allowed_write_namespaces): + raise PermissionError(f"Namespace access denied: {namespace_value}") + self.db.ensure_namespace(user_id=user_id, name=namespace_value) + + if mode == "staging" and not feature_enabled("ENGRAM_V2_STAGING_WRITES", default=True): + mode = "direct" + + provenance = build_provenance( + source_type=source_type, + source_app=source_app, + source_event_id=source_event_id, + agent_id=agent_id, + tool="propose_write", + ) + + if mode == "direct": + if not trusted_direct: + raise PermissionError("Direct mode is allowed only for trusted local clients") + return self._apply_direct_write( + content=content, + user_id=user_id, + agent_id=agent_id, + categories=categories, + metadata=metadata, + infer=infer, + provenance=provenance, + source_app=source_app, + ) + + checks = self.invariant_engine.evaluate_add(user_id=user_id, content=content) + status = "PENDING" + if checks.get("conflicts") or checks.get("pii_risk"): + status = "AUTO_STASHED" + + changes = [ + { + "op": "ADD", + "target": "memory_item", + "target_id": None, + "patch": { + "content": content, + "categories": categories or [], + "metadata": metadata, + "infer": infer, + "source_app": source_app, + "confidentiality_scope": confidentiality_scope, + "namespace": namespace_value, + }, + } + ] + preview = { + "summary": content[:140], + "scope": confidentiality_scope, + "namespace": namespace_value, + "category_count": len(categories or []), + } + commit = self.staging_store.create_commit( + user_id=user_id, + agent_id=agent_id, + scope=confidentiality_scope, + changes=changes, + checks=checks, + preview=preview, + provenance=provenance, + status=status, + ) + self.db.record_agent_proposal(user_id=user_id, agent_id=agent_id, status=status) + + if checks.get("conflicts"): + for conflict in checks["conflicts"]: + self.staging_store.add_conflict( + user_id=user_id, + conflict_key=conflict["key"], + existing={"value": conflict["existing"]}, + proposed={"value": conflict["proposed"], "source": commit["id"]}, + source_commit_id=commit["id"], + ) + + if status == "AUTO_STASHED": + self.staging_store.mark_auto_stashed(commit["id"]) + + metrics.record_staged_commit(status) + + auto_merged = False + if ( + status == "PENDING" + and agent_id + and feature_enabled("ENGRAM_V2_TRUST_AUTOMERGE", default=True) + and not checks.get("duplicate_of") + and not checks.get("conflicts") + and not checks.get("pii_risk") + ): + threshold = self._parse_float_env("ENGRAM_V2_AUTO_MERGE_TRUST_THRESHOLD", 0.85) + trust = self.db.get_agent_trust(user_id=user_id, agent_id=agent_id) + if float(trust.get("trust_score", 0.0)) >= threshold and self._passes_auto_merge_guardrails(trust): + auto_merged = True + self.approve_commit(commit_id=commit["id"]) + status = "APPROVED" + + return { + "mode": "staging", + "commit_id": commit["id"], + "status": status, + "checks": checks, + "preview": preview, + "auto_merged": auto_merged, + } + + def _apply_direct_write( + self, + *, + content: str, + user_id: str, + agent_id: Optional[str], + categories: Optional[List[str]], + metadata: Dict[str, Any], + infer: bool, + provenance: Dict[str, Any], + source_app: Optional[str], + ) -> Dict[str, Any]: + metadata = dict(metadata) + metadata.update(provenance) + metadata["allow_sensitive"] = True + sharing_scope = str(metadata.get("sharing_scope", "global")).lower() + result = self.memory.add( + messages=content, + user_id=user_id, + agent_id=agent_id, + categories=categories, + metadata=metadata, + scope=sharing_scope, + infer=infer, + source_app=source_app, + ) + + for item in result.get("results", []): + memory_id = item.get("id") + if not memory_id: + continue + created = self.memory.db.get_memory(memory_id) + if not created: + continue + created["confidentiality_scope"] = metadata.get("confidentiality_scope", "work") + created["source_type"] = provenance.get("source_type") + created["source_app"] = provenance.get("source_app") + created["source_event_id"] = provenance.get("source_event_id") + created["status"] = "active" + created["namespace"] = self._normalize_namespace(metadata.get("namespace")) + self.memory.db.update_memory( + memory_id, + { + "confidentiality_scope": created["confidentiality_scope"], + "source_type": created["source_type"], + "source_app": created["source_app"], + "source_event_id": created["source_event_id"], + "status": created["status"], + "namespace": created["namespace"], + }, + ) + self.episodic_store.ingest_memory_as_view( + user_id=user_id, + agent_id=agent_id, + memory_id=memory_id, + content=created.get("memory", content), + metadata=created.get("metadata", {}), + timestamp=created.get("created_at"), + ) + self.invariant_engine.upsert_invariants_from_content( + user_id=user_id, + content=created.get("memory", content), + source_memory_id=memory_id, + ) + + return { + "mode": "direct", + "result": result, + } + + def list_pending_commits( + self, + *, + user_id: Optional[str], + status: Optional[str] = None, + limit: int = 100, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + if token or agent_id: + self.authenticate_session( + token=token, + user_id=user_id, + agent_id=agent_id, + require_for_agent=bool(agent_id), + required_capabilities=["review_commits"], + ) + commits = self.staging_store.list_commits(user_id=user_id, status=status, limit=limit) + return {"commits": commits, "count": len(commits)} + + def approve_commit( + self, + *, + commit_id: str, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + start = time.perf_counter() + commit = self.staging_store.get_commit(commit_id) + if not commit: + return {"error": "Commit not found", "commit_id": commit_id} + if token or agent_id: + self.authenticate_session( + token=token, + user_id=commit.get("user_id"), + agent_id=agent_id, + require_for_agent=bool(agent_id), + required_capabilities=["review_commits"], + ) + + if commit.get("status") == "APPROVED": + return {"status": "APPROVED", "commit_id": commit_id, "applied": []} + if commit.get("status") == "REJECTED": + return {"error": "Commit already rejected", "commit_id": commit_id} + + moved_to_applying = self.db.transition_proposal_commit_status( + commit_id, + from_statuses=["PENDING", "AUTO_STASHED"], + to_status="APPLYING", + ) + if not moved_to_applying: + latest = self.staging_store.get_commit(commit_id) + if latest and latest.get("status") == "APPROVED": + return {"status": "APPROVED", "commit_id": commit_id, "applied": []} + status = latest.get("status") if latest else commit.get("status") + return {"error": f"Commit not approvable from status {status}", "commit_id": commit_id} + + applied: List[Dict[str, Any]] = [] + created_memory_ids: List[str] = [] + try: + for change in commit.get("changes", []): + op = str(change.get("op", "ADD")).upper() + target = change.get("target", "memory_item") + patch = change.get("patch", {}) + + if target == "memory_item" and op == "ADD": + outcome = self._apply_direct_write( + content=patch.get("content", ""), + user_id=commit.get("user_id", "default"), + agent_id=commit.get("agent_id"), + categories=patch.get("categories", []), + metadata=patch.get("metadata", {}), + infer=bool(patch.get("infer", False)), + provenance=commit.get("provenance", {}), + source_app=patch.get("source_app"), + ) + applied.append(outcome) + for row in outcome.get("result", {}).get("results", []): + memory_id = row.get("id") + if memory_id: + created_memory_ids.append(memory_id) + elif target == "memory_item" and op == "UPDATE": + memory_id = change.get("target_id") + self.memory.update(memory_id, patch) + applied.append({"op": "UPDATE", "target_id": memory_id}) + elif target == "memory_item" and op == "DELETE": + memory_id = change.get("target_id") + self.memory.delete(memory_id) + applied.append({"op": "DELETE", "target_id": memory_id}) + else: + raise ValueError(f"Unsupported staged change: target={target}, op={op}") + except Exception as exc: + rolled_back = 0 + for memory_id in reversed(created_memory_ids): + try: + self.memory.delete(memory_id) + rolled_back += 1 + except Exception: + continue + + latest = self.staging_store.get_commit(commit_id) or {} + checks = dict(latest.get("checks", {})) + checks["apply_error"] = str(exc) + checks["rollback_deleted"] = rolled_back + self.db.transition_proposal_commit_status( + commit_id, + from_statuses=["APPLYING"], + to_status="PENDING", + updates={"checks": checks}, + ) + return { + "error": "Commit apply failed", + "commit_id": commit_id, + "rolled_back": rolled_back, + "details": str(exc), + } + + finalized = self.db.transition_proposal_commit_status( + commit_id, + from_statuses=["APPLYING"], + to_status="APPROVED", + ) + if not finalized: + return {"error": "Commit approval finalization failed", "commit_id": commit_id} + + self.db.record_agent_commit_outcome( + user_id=commit.get("user_id", "default"), + agent_id=commit.get("agent_id"), + outcome="APPROVED", + ) + latency_ms = (time.perf_counter() - start) * 1000 + metrics.record_commit_approval(latency_ms) + return {"status": "APPROVED", "commit_id": commit_id, "applied": applied} + + def reject_commit( + self, + *, + commit_id: str, + reason: Optional[str] = None, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + commit = self.staging_store.get_commit(commit_id) + if not commit: + return {"error": "Commit not found", "commit_id": commit_id} + if token or agent_id: + self.authenticate_session( + token=token, + user_id=commit.get("user_id"), + agent_id=agent_id, + require_for_agent=bool(agent_id), + required_capabilities=["review_commits"], + ) + if commit.get("status") == "REJECTED": + return {"status": "REJECTED", "commit_id": commit_id, "reason": reason} + if commit.get("status") == "APPROVED": + return {"error": "Approved commits cannot be rejected", "commit_id": commit_id} + + checks = dict(commit.get("checks", {})) + if reason: + checks["rejection_reason"] = reason + moved = self.db.transition_proposal_commit_status( + commit_id, + from_statuses=["PENDING", "AUTO_STASHED", "APPLYING"], + to_status="REJECTED", + updates={"checks": checks}, + ) + if not moved: + latest = self.staging_store.get_commit(commit_id) + latest_status = latest.get("status") if latest else commit.get("status") + return {"error": f"Commit not rejectable from status {latest_status}", "commit_id": commit_id} + + self.db.record_agent_commit_outcome( + user_id=commit.get("user_id", "default"), + agent_id=commit.get("agent_id"), + outcome="REJECTED", + ) + metrics.record_commit_rejection() + return {"status": "REJECTED", "commit_id": commit_id, "reason": reason} + + def resolve_conflict( + self, + *, + stash_id: str, + resolution: str, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + resolution = resolution.upper() + if resolution not in {"UNRESOLVED", "KEEP_EXISTING", "ACCEPT_PROPOSED", "KEEP_BOTH"}: + return {"error": "Invalid resolution", "stash_id": stash_id} + + stash = self.db.get_conflict_stash(stash_id) + if not stash: + return {"error": "Conflict stash not found", "stash_id": stash_id} + if token or agent_id: + self.authenticate_session( + token=token, + user_id=stash.get("user_id"), + agent_id=agent_id, + require_for_agent=bool(agent_id), + required_capabilities=["resolve_conflicts"], + ) + + self.staging_store.resolve_conflict(stash_id, resolution) + if resolution == "ACCEPT_PROPOSED": + proposed = stash.get("proposed", {}) or {} + value = proposed.get("value") + key = stash.get("conflict_key") + if value and key: + self.db.upsert_invariant( + user_id=stash.get("user_id", "default"), + invariant_key=key, + invariant_value=str(value), + category="identity", + confidence=0.8, + source_memory_id=None, + ) + + updated = self.db.get_conflict_stash(stash_id) + return {"stash": updated} + + def get_daily_digest( + self, + *, + user_id: str, + date_str: str, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + if token or agent_id: + self.authenticate_session( + token=token, + user_id=user_id, + agent_id=agent_id, + require_for_agent=bool(agent_id), + required_capabilities=["read_digest"], + ) + existing = self.db.get_daily_digest(user_id=user_id, digest_date=date_str) + if existing: + payload = existing.get("payload", {}) + return { + "date": date_str, + "user_id": user_id, + "top_conflicts": payload.get("top_conflicts", []), + "top_proposed_consolidations": payload.get("top_proposed_consolidations", []), + "scene_highlights": payload.get("scene_highlights", []), + } + + payload = self._build_daily_digest_payload(user_id=user_id, date_str=date_str) + self.db.upsert_daily_digest(user_id=user_id, digest_date=date_str, payload=payload) + return { + "date": date_str, + "user_id": user_id, + "top_conflicts": payload["top_conflicts"], + "top_proposed_consolidations": payload["top_proposed_consolidations"], + "scene_highlights": payload.get("scene_highlights", []), + } + + def _build_daily_digest_payload(self, *, user_id: str, date_str: str) -> Dict[str, Any]: + conflicts = self.db.list_conflict_stash(user_id=user_id, resolution="UNRESOLVED", limit=20) + pending = self.db.list_proposal_commits(user_id=user_id, status="PENDING", limit=20) + day_start = f"{date_str}T00:00:00" + day_end = f"{date_str}T23:59:59.999999" + scenes = self.db.get_scenes( + user_id=user_id, + start_after=day_start, + start_before=day_end, + limit=20, + ) + scene_highlights = [ + { + "scene_id": scene.get("id"), + "summary": scene.get("summary"), + "topic": scene.get("topic"), + "start_time": scene.get("start_time"), + "memory_count": len(scene.get("memory_ids", [])), + } + for scene in scenes[:10] + ] + return { + "top_conflicts": conflicts[:10], + "top_proposed_consolidations": pending[:10], + "scene_highlights": scene_highlights, + } + + def run_sleep_cycle( + self, + *, + user_id: Optional[str] = None, + date_str: Optional[str] = None, + apply_decay: bool = True, + cleanup_stale_refs: bool = True, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + if token or agent_id: + self.authenticate_session( + token=token, + user_id=user_id, + agent_id=agent_id, + require_for_agent=bool(agent_id), + required_capabilities=["run_sleep_cycle"], + ) + target_date = date_str or (datetime.utcnow() - timedelta(days=1)).date().isoformat() + users = [user_id] if user_id else self.db.list_user_ids() + if not users: + users = ["default"] + + summary: Dict[str, Any] = { + "date": target_date, + "users": {}, + "stale_refs_removed": 0, + } + day_start = f"{target_date}T00:00:00" + day_end = f"{target_date}T23:59:59.999999" + + for uid in users: + user_stats = { + "promoted": 0, + "digests_upserted": 0, + "scenes_considered": 0, + "decay": {"decayed": 0, "forgotten": 0, "promoted": 0}, + } + memories = self.db.get_all_memories(user_id=uid) + day_memories = [m for m in memories if str(m.get("created_at", "")).startswith(target_date)] + + # Ensure CAST views/scenes are available for the day. + for memory in day_memories: + if memory.get("scene_id"): + continue + try: + self.episodic_store.ingest_memory_as_view( + user_id=uid, + agent_id=memory.get("agent_id"), + memory_id=memory.get("id"), + content=memory.get("memory", ""), + metadata=memory.get("metadata", {}), + timestamp=memory.get("created_at"), + ) + except Exception: + # Non-fatal: keep sleep cycle robust. + continue + + for memory in day_memories: + if memory.get("layer") == "lml": + continue + importance = float(memory.get("importance", 0.0) or 0.0) + strength = float(memory.get("strength", 0.0) or 0.0) + if importance >= 0.8 or strength >= 0.85: + if self.db.update_memory(memory["id"], {"layer": "lml"}): + user_stats["promoted"] += 1 + + payload = self._build_daily_digest_payload(user_id=uid, date_str=target_date) + self.db.upsert_daily_digest(user_id=uid, digest_date=target_date, payload=payload) + user_stats["digests_upserted"] += 1 + user_stats["scenes_considered"] = len( + self.db.get_scenes( + user_id=uid, + start_after=day_start, + start_before=day_end, + limit=100, + ) + ) + + if apply_decay: + user_stats["decay"] = self.memory.apply_decay(scope={"user_id": uid}) + summary["users"][uid] = user_stats + + if cleanup_stale_refs: + summary["stale_refs_removed"] = int(self.ref_manager.cleanup_stale_refs()) + return summary + + def get_agent_trust( + self, + *, + user_id: str, + agent_id: str, + token: Optional[str] = None, + requester_agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + if token or requester_agent_id: + self.authenticate_session( + token=token, + user_id=user_id, + agent_id=requester_agent_id, + require_for_agent=bool(requester_agent_id), + required_capabilities=["read_trust"], + ) + return self.db.get_agent_trust(user_id=user_id, agent_id=agent_id) + + def list_namespaces( + self, + *, + user_id: Optional[str] = None, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> List[Dict[str, Any]]: + if token or agent_id: + self.authenticate_session( + token=token, + user_id=user_id, + agent_id=agent_id, + require_for_agent=bool(agent_id), + required_capabilities=["manage_namespaces"], + ) + return self.db.list_namespaces(user_id=user_id) + + def declare_namespace( + self, + *, + user_id: str, + namespace: str, + description: Optional[str] = None, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + if token or agent_id: + self.authenticate_session( + token=token, + user_id=user_id, + agent_id=agent_id, + require_for_agent=bool(agent_id), + required_capabilities=["manage_namespaces"], + ) + namespace_id = self.db.ensure_namespace(user_id=user_id, name=namespace, description=description) + for item in self.db.list_namespaces(user_id=user_id): + if item.get("id") == namespace_id: + return item + return {"id": namespace_id, "user_id": user_id, "name": namespace} + + def grant_namespace_permission( + self, + *, + user_id: str, + namespace: str, + agent_id: str, + capability: str = "read", + expires_at: Optional[str] = None, + token: Optional[str] = None, + requester_agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + if token or requester_agent_id: + self.authenticate_session( + token=token, + user_id=user_id, + agent_id=requester_agent_id, + require_for_agent=bool(requester_agent_id), + required_capabilities=["manage_namespaces"], + ) + permission_id = self.db.grant_namespace_permission( + user_id=user_id, + agent_id=agent_id, + namespace=namespace, + capability=capability, + expires_at=expires_at, + ) + return { + "permission_id": permission_id, + "user_id": user_id, + "namespace": namespace, + "agent_id": agent_id, + "capability": capability, + "expires_at": expires_at, + } + + def upsert_agent_policy( + self, + *, + user_id: str, + agent_id: str, + allowed_confidentiality_scopes: Optional[List[str]] = None, + allowed_capabilities: Optional[List[str]] = None, + allowed_namespaces: Optional[List[str]] = None, + token: Optional[str] = None, + requester_agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + if token or requester_agent_id: + self.authenticate_session( + token=token, + user_id=user_id, + agent_id=requester_agent_id, + require_for_agent=bool(requester_agent_id), + required_capabilities=["manage_namespaces"], + ) + normalized_scopes = self._normalize_policy_scopes( + allowed_confidentiality_scopes if allowed_confidentiality_scopes is not None else list(CONFIDENTIALITY_SCOPES) + ) + normalized_capabilities = self._normalize_policy_capabilities( + allowed_capabilities if allowed_capabilities is not None else list(DEFAULT_CAPABILITIES) + ) + normalized_namespaces = self._normalize_policy_namespaces( + allowed_namespaces if allowed_namespaces is not None else ["default"] + ) + for namespace in normalized_namespaces: + if namespace == "*": + continue + self.db.ensure_namespace(user_id=user_id, name=namespace) + return self.db.upsert_agent_policy( + user_id=user_id, + agent_id=agent_id, + allowed_confidentiality_scopes=normalized_scopes, + allowed_capabilities=normalized_capabilities, + allowed_namespaces=normalized_namespaces, + ) + + def get_agent_policy( + self, + *, + user_id: str, + agent_id: str, + include_wildcard: bool = True, + token: Optional[str] = None, + requester_agent_id: Optional[str] = None, + ) -> Optional[Dict[str, Any]]: + if token or requester_agent_id: + self.authenticate_session( + token=token, + user_id=user_id, + agent_id=requester_agent_id, + require_for_agent=bool(requester_agent_id), + required_capabilities=["manage_namespaces"], + ) + return self.db.get_agent_policy(user_id=user_id, agent_id=agent_id, include_wildcard=include_wildcard) + + def list_agent_policies( + self, + *, + user_id: Optional[str] = None, + token: Optional[str] = None, + requester_agent_id: Optional[str] = None, + ) -> List[Dict[str, Any]]: + if token or requester_agent_id: + self.authenticate_session( + token=token, + user_id=user_id, + agent_id=requester_agent_id, + require_for_agent=bool(requester_agent_id), + required_capabilities=["manage_namespaces"], + ) + return self.db.list_agent_policies(user_id=user_id) + + def delete_agent_policy( + self, + *, + user_id: str, + agent_id: str, + token: Optional[str] = None, + requester_agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + if token or requester_agent_id: + self.authenticate_session( + token=token, + user_id=user_id, + agent_id=requester_agent_id, + require_for_agent=bool(requester_agent_id), + required_capabilities=["manage_namespaces"], + ) + deleted = self.db.delete_agent_policy(user_id=user_id, agent_id=agent_id) + return {"deleted": bool(deleted), "user_id": user_id, "agent_id": agent_id} diff --git a/engram/core/policy.py b/engram/core/policy.py new file mode 100644 index 0000000..dc9553e --- /dev/null +++ b/engram/core/policy.py @@ -0,0 +1,148 @@ +"""Policy gateway helpers for Engram v2.""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from typing import Any, Dict, Iterable, List, Optional, Set + + +ALL_CONFIDENTIALITY_SCOPES = ["work", "personal", "finance", "health", "private"] +CONFIDENTIALITY_SCOPES = set(ALL_CONFIDENTIALITY_SCOPES) +DEFAULT_CAPABILITIES = [ + "search", + "propose_write", + "read_scene", + "review_commits", + "resolve_conflicts", + "read_digest", + "read_trust", + "manage_namespaces", + "run_sleep_cycle", +] +SENSITIVE_HINTS = { + "finance": {"finance", "bank", "salary", "invoice", "tax", "payment", "credit"}, + "health": {"health", "medical", "doctor", "diagnosis", "therapy", "medication"}, + "private": {"password", "secret", "token", "api_key", "apikey", "private"}, + "personal": {"family", "relationship", "home", "personal"}, +} + + +@dataclass +class PolicyDecision: + allowed: bool + masked: bool = False + reason: Optional[str] = None + + +def feature_enabled(name: str, default: bool = True) -> bool: + value = os.environ.get(name) + if value is None: + return default + return value.strip().lower() in {"1", "true", "yes", "on"} + + +def normalize_confidentiality_scope(scope: Optional[str]) -> str: + if not scope: + return "work" + scope_value = str(scope).strip().lower() + if scope_value in CONFIDENTIALITY_SCOPES: + return scope_value + return "work" + + +def detect_confidentiality_scope( + *, + categories: Optional[Iterable[str]] = None, + metadata: Optional[Dict[str, Any]] = None, + content: Optional[str] = None, + explicit_scope: Optional[str] = None, +) -> str: + if explicit_scope: + return normalize_confidentiality_scope(explicit_scope) + + metadata = metadata or {} + meta_scope = metadata.get("confidentiality_scope") or metadata.get("scope_confidentiality") + if meta_scope: + return normalize_confidentiality_scope(meta_scope) + + terms = set() + for category in categories or []: + terms.update(str(category).lower().replace("/", " ").replace("_", " ").split()) + if content: + terms.update(str(content).lower().split()) + + for scope, hints in SENSITIVE_HINTS.items(): + if terms & hints: + return scope + return "work" + + +def token_required_for_agent(agent_id: Optional[str]) -> bool: + if not feature_enabled("ENGRAM_V2_POLICY_GATEWAY", default=True): + return False + return bool(agent_id) + + +def default_allowed_scopes() -> List[str]: + return list(ALL_CONFIDENTIALITY_SCOPES) + + +def is_trusted_local_request(client_host: Optional[str]) -> bool: + if client_host is None: + return False + host = str(client_host).strip().lower() + return host in { + "127.0.0.1", + "::1", + "::ffff:127.0.0.1", + "localhost", + "testclient", + } + + +def _build_masked_shape(item: Dict[str, Any]) -> Dict[str, Any]: + created_at = item.get("created_at") or item.get("timestamp") + scope = normalize_confidentiality_scope(item.get("confidentiality_scope")) + return { + "id": item.get("id"), + "type": f"{scope}_event" if scope != "work" else "memory_event", + "time": created_at, + "importance": item.get("importance", 0.5), + "details": "[REDACTED]", + "masked": True, + } + + +def enforce_scope_on_item( + item: Dict[str, Any], + allowed_scopes: Optional[Set[str]], +) -> Dict[str, Any]: + if not feature_enabled("ENGRAM_V2_POLICY_GATEWAY", default=True): + visible = dict(item) + visible["masked"] = False + return visible + + if allowed_scopes is None: + visible = dict(item) + visible["masked"] = False + return visible + + if not allowed_scopes: + return _build_masked_shape(item) + + scope = normalize_confidentiality_scope(item.get("confidentiality_scope")) + if scope in allowed_scopes: + item = dict(item) + item["masked"] = False + return item + + return _build_masked_shape(item) + + +def enforce_scope_on_results( + results: List[Dict[str, Any]], + allowed_scopes: Optional[Iterable[str]], +) -> List[Dict[str, Any]]: + scope_set = {normalize_confidentiality_scope(s) for s in (allowed_scopes or [])} + return [enforce_scope_on_item(r, scope_set) for r in results] diff --git a/engram/core/provenance.py b/engram/core/provenance.py new file mode 100644 index 0000000..920e99b --- /dev/null +++ b/engram/core/provenance.py @@ -0,0 +1,40 @@ +"""Provenance helpers for Engram v2.""" + +from __future__ import annotations + +from dataclasses import dataclass, asdict +from datetime import datetime +from typing import Any, Dict, Optional + + +@dataclass +class Provenance: + source_type: str = "mcp" + source_app: Optional[str] = None + source_event_id: Optional[str] = None + agent_id: Optional[str] = None + tool: Optional[str] = None + created_at: str = "" + + def to_dict(self) -> Dict[str, Any]: + data = asdict(self) + if not data["created_at"]: + data["created_at"] = datetime.utcnow().isoformat() + return data + + +def build_provenance( + *, + source_type: str = "mcp", + source_app: Optional[str] = None, + source_event_id: Optional[str] = None, + agent_id: Optional[str] = None, + tool: Optional[str] = None, +) -> Dict[str, Any]: + return Provenance( + source_type=source_type, + source_app=source_app, + source_event_id=source_event_id, + agent_id=agent_id, + tool=tool, + ).to_dict() diff --git a/engram/core/scene.py b/engram/core/scene.py index b213cc4..97567cd 100644 --- a/engram/core/scene.py +++ b/engram/core/scene.py @@ -162,6 +162,7 @@ def create_scene( location: Optional[str] = None, participants: Optional[List[str]] = None, embedding: Optional[List[float]] = None, + namespace: str = "default", ) -> Dict[str, Any]: """Create a new scene and add the first memory to it.""" scene_id = str(uuid.uuid4()) @@ -177,6 +178,7 @@ def create_scene( "end_time": None, "embedding": embedding, "strength": 1.0, + "namespace": namespace, } self.db.add_scene(scene_data) self.db.add_scene_memory(scene_id, first_memory_id, position=0) @@ -192,6 +194,7 @@ def add_memory_to_scene( memory_id: str, embedding: Optional[List[float]] = None, timestamp: Optional[str] = None, + namespace: Optional[str] = None, ) -> None: """Append a memory to an existing scene.""" scene = self.db.get_scene(scene_id) @@ -205,6 +208,8 @@ def add_memory_to_scene( updates: Dict[str, Any] = {"memory_ids": memory_ids} if timestamp: updates["end_time"] = timestamp + if namespace: + updates["namespace"] = namespace # Running average of embeddings if embedding and scene.get("embedding"): diff --git a/engram/db/sqlite.py b/engram/db/sqlite.py index f602e8a..3f73f2c 100644 --- a/engram/db/sqlite.py +++ b/engram/db/sqlite.py @@ -160,8 +160,10 @@ def _init_db(self) -> None: ); """ ) - # Migration: add scene_id column to memories if missing - self._migrate_add_column("memories", "scene_id", "TEXT") + # Legacy migration: add scene_id column to memories if missing. + self._migrate_add_column_conn(conn, "memories", "scene_id", "TEXT") + # v2 schema + idempotent migrations. + self._ensure_v2_schema(conn) @contextmanager def _get_connection(self): @@ -173,9 +175,461 @@ def _get_connection(self): finally: conn.close() + def _ensure_v2_schema(self, conn: sqlite3.Connection) -> None: + """Create and migrate Engram v2 schema in-place (idempotent).""" + conn.execute( + """ + CREATE TABLE IF NOT EXISTS schema_migrations ( + version TEXT PRIMARY KEY, + applied_at TEXT DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + + migrations: Dict[str, str] = { + "v2_001": """ + CREATE TABLE IF NOT EXISTS views ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + agent_id TEXT, + timestamp TEXT NOT NULL, + place_type TEXT, + place_value TEXT, + topic_label TEXT, + topic_embedding_ref TEXT, + characters TEXT DEFAULT '[]', + raw_text TEXT, + signals TEXT DEFAULT '{}', + scene_id TEXT, + created_at TEXT DEFAULT CURRENT_TIMESTAMP + ); + CREATE INDEX IF NOT EXISTS idx_views_user_time ON views(user_id, timestamp DESC); + CREATE INDEX IF NOT EXISTS idx_views_scene ON views(scene_id); + """, + "v2_002": """ + CREATE TABLE IF NOT EXISTS proposal_commits ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + agent_id TEXT, + scope TEXT, + status TEXT NOT NULL DEFAULT 'PENDING', + checks TEXT DEFAULT '{}', + preview TEXT DEFAULT '{}', + provenance TEXT DEFAULT '{}', + created_at TEXT DEFAULT CURRENT_TIMESTAMP, + updated_at TEXT DEFAULT CURRENT_TIMESTAMP + ); + CREATE INDEX IF NOT EXISTS idx_proposal_commits_user ON proposal_commits(user_id); + CREATE INDEX IF NOT EXISTS idx_proposal_commits_status ON proposal_commits(status); + + CREATE TABLE IF NOT EXISTS proposal_changes ( + id TEXT PRIMARY KEY, + commit_id TEXT NOT NULL, + op TEXT NOT NULL, + target TEXT NOT NULL, + target_id TEXT, + patch TEXT DEFAULT '{}', + created_at TEXT DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (commit_id) REFERENCES proposal_commits(id) + ); + CREATE INDEX IF NOT EXISTS idx_proposal_changes_commit ON proposal_changes(commit_id); + """, + "v2_003": """ + CREATE TABLE IF NOT EXISTS conflict_stash ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + conflict_key TEXT NOT NULL, + existing TEXT DEFAULT '{}', + proposed TEXT DEFAULT '{}', + resolution TEXT NOT NULL DEFAULT 'UNRESOLVED', + source_commit_id TEXT, + created_at TEXT DEFAULT CURRENT_TIMESTAMP, + resolved_at TEXT + ); + CREATE INDEX IF NOT EXISTS idx_conflict_stash_user ON conflict_stash(user_id); + CREATE INDEX IF NOT EXISTS idx_conflict_stash_resolution ON conflict_stash(resolution); + """, + "v2_004": """ + CREATE TABLE IF NOT EXISTS sessions ( + id TEXT PRIMARY KEY, + token_hash TEXT NOT NULL UNIQUE, + user_id TEXT NOT NULL, + agent_id TEXT, + allowed_confidentiality_scopes TEXT DEFAULT '[]', + capabilities TEXT DEFAULT '[]', + expires_at TEXT NOT NULL, + created_at TEXT DEFAULT CURRENT_TIMESTAMP, + revoked_at TEXT + ); + CREATE INDEX IF NOT EXISTS idx_sessions_user ON sessions(user_id); + CREATE INDEX IF NOT EXISTS idx_sessions_expires ON sessions(expires_at); + """, + "v2_005": """ + CREATE TABLE IF NOT EXISTS memory_refcounts ( + memory_id TEXT PRIMARY KEY, + strong_count INTEGER DEFAULT 0, + weak_count INTEGER DEFAULT 0, + updated_at TEXT DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (memory_id) REFERENCES memories(id) + ); + + CREATE TABLE IF NOT EXISTS memory_subscribers ( + id TEXT PRIMARY KEY, + memory_id TEXT NOT NULL, + subscriber TEXT NOT NULL, + ref_type TEXT NOT NULL CHECK(ref_type IN ('strong','weak')), + created_at TEXT DEFAULT CURRENT_TIMESTAMP, + UNIQUE(memory_id, subscriber, ref_type), + FOREIGN KEY (memory_id) REFERENCES memories(id) + ); + CREATE INDEX IF NOT EXISTS idx_memory_subscribers_memory ON memory_subscribers(memory_id); + """, + "v2_006": """ + CREATE TABLE IF NOT EXISTS daily_digests ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + digest_date TEXT NOT NULL, + payload TEXT DEFAULT '{}', + generated_at TEXT DEFAULT CURRENT_TIMESTAMP, + UNIQUE(user_id, digest_date) + ); + CREATE INDEX IF NOT EXISTS idx_daily_digests_user_date ON daily_digests(user_id, digest_date); + """, + "v2_007": """ + CREATE TABLE IF NOT EXISTS invariants ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + invariant_key TEXT NOT NULL, + invariant_value TEXT NOT NULL, + category TEXT DEFAULT 'identity', + confidence REAL DEFAULT 0.0, + source_memory_id TEXT, + created_at TEXT DEFAULT CURRENT_TIMESTAMP, + updated_at TEXT DEFAULT CURRENT_TIMESTAMP, + UNIQUE(user_id, invariant_key) + ); + CREATE INDEX IF NOT EXISTS idx_invariants_user ON invariants(user_id); + """, + "v2_008": """ + CREATE TABLE IF NOT EXISTS agent_trust ( + user_id TEXT NOT NULL, + agent_id TEXT NOT NULL, + total_proposals INTEGER DEFAULT 0, + approved_proposals INTEGER DEFAULT 0, + rejected_proposals INTEGER DEFAULT 0, + auto_stashed_proposals INTEGER DEFAULT 0, + last_proposed_at TEXT, + last_approved_at TEXT, + trust_score REAL DEFAULT 0.0, + updated_at TEXT DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (user_id, agent_id) + ); + CREATE INDEX IF NOT EXISTS idx_agent_trust_user ON agent_trust(user_id); + CREATE INDEX IF NOT EXISTS idx_agent_trust_score ON agent_trust(trust_score DESC); + """, + "v2_009": """ + CREATE TABLE IF NOT EXISTS namespaces ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + name TEXT NOT NULL, + description TEXT, + created_at TEXT DEFAULT CURRENT_TIMESTAMP, + updated_at TEXT DEFAULT CURRENT_TIMESTAMP, + UNIQUE(user_id, name) + ); + CREATE INDEX IF NOT EXISTS idx_namespaces_user ON namespaces(user_id); + + CREATE TABLE IF NOT EXISTS namespace_permissions ( + id TEXT PRIMARY KEY, + namespace_id TEXT NOT NULL, + user_id TEXT NOT NULL, + agent_id TEXT NOT NULL, + capability TEXT NOT NULL, + granted_at TEXT DEFAULT CURRENT_TIMESTAMP, + expires_at TEXT, + FOREIGN KEY (namespace_id) REFERENCES namespaces(id), + UNIQUE(namespace_id, user_id, agent_id, capability) + ); + CREATE INDEX IF NOT EXISTS idx_ns_permissions_agent ON namespace_permissions(user_id, agent_id); + CREATE INDEX IF NOT EXISTS idx_ns_permissions_namespace ON namespace_permissions(namespace_id); + """, + "v2_010": """ + CREATE TABLE IF NOT EXISTS agent_policies ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + agent_id TEXT NOT NULL, + allowed_confidentiality_scopes TEXT DEFAULT '[]', + allowed_capabilities TEXT DEFAULT '[]', + allowed_namespaces TEXT DEFAULT '[]', + created_at TEXT DEFAULT CURRENT_TIMESTAMP, + updated_at TEXT DEFAULT CURRENT_TIMESTAMP, + UNIQUE(user_id, agent_id) + ); + CREATE INDEX IF NOT EXISTS idx_agent_policies_user ON agent_policies(user_id); + CREATE INDEX IF NOT EXISTS idx_agent_policies_agent ON agent_policies(agent_id); + """, + } + + for version, ddl in migrations.items(): + if not self._is_migration_applied(conn, version): + conn.executescript(ddl) + conn.execute( + "INSERT OR IGNORE INTO schema_migrations (version) VALUES (?)", + (version,), + ) + + # v2 columns on existing canonical tables. + self._migrate_add_column_conn(conn, "memories", "confidentiality_scope", "TEXT DEFAULT 'work'") + self._migrate_add_column_conn(conn, "memories", "source_type", "TEXT") + self._migrate_add_column_conn(conn, "memories", "source_app", "TEXT") + self._migrate_add_column_conn(conn, "memories", "source_event_id", "TEXT") + self._migrate_add_column_conn(conn, "memories", "decay_lambda", "REAL DEFAULT 0.12") + self._migrate_add_column_conn(conn, "memories", "status", "TEXT DEFAULT 'active'") + self._migrate_add_column_conn(conn, "memories", "importance", "REAL DEFAULT 0.5") + self._migrate_add_column_conn(conn, "memories", "sensitivity", "TEXT DEFAULT 'normal'") + self._migrate_add_column_conn(conn, "memories", "namespace", "TEXT DEFAULT 'default'") + + self._migrate_add_column_conn(conn, "scenes", "layer", "TEXT DEFAULT 'sml'") + self._migrate_add_column_conn(conn, "scenes", "scene_strength", "REAL DEFAULT 1.0") + self._migrate_add_column_conn(conn, "scenes", "topic_embedding_ref", "TEXT") + self._migrate_add_column_conn(conn, "scenes", "namespace", "TEXT DEFAULT 'default'") + + self._migrate_add_column_conn(conn, "profiles", "role_bias", "TEXT") + self._migrate_add_column_conn(conn, "profiles", "profile_summary", "TEXT") + self._migrate_add_column_conn(conn, "sessions", "namespaces", "TEXT DEFAULT '[]'") + self._migrate_add_column_conn(conn, "memory_subscribers", "last_seen_at", "TEXT") + self._migrate_add_column_conn(conn, "memory_subscribers", "expires_at", "TEXT") + + conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_subscribers_expires ON memory_subscribers(expires_at)") + + # Backfills. + conn.execute( + """ + UPDATE memories + SET confidentiality_scope = 'work' + WHERE confidentiality_scope IS NULL OR confidentiality_scope = '' + """ + ) + conn.execute( + """ + UPDATE memories + SET status = 'active' + WHERE status IS NULL OR status = '' + """ + ) + conn.execute( + """ + UPDATE memories + SET namespace = 'default' + WHERE namespace IS NULL OR namespace = '' + """ + ) + conn.execute( + """ + UPDATE scenes + SET namespace = 'default' + WHERE namespace IS NULL OR namespace = '' + """ + ) + conn.execute( + """ + UPDATE sessions + SET namespaces = '[]' + WHERE namespaces IS NULL OR namespaces = '' + """ + ) + conn.execute( + """ + UPDATE memories + SET decay_lambda = 0.12 + WHERE decay_lambda IS NULL + """ + ) + conn.execute( + """ + UPDATE memory_subscribers + SET + last_seen_at = COALESCE(last_seen_at, created_at), + expires_at = COALESCE( + expires_at, + CASE + WHEN ref_type = 'weak' THEN datetime(created_at, '+14 days') + ELSE NULL + END + ) + """ + ) + conn.execute( + """ + UPDATE memories + SET importance = COALESCE( + CASE + WHEN json_extract(metadata, '$.importance') IS NOT NULL + THEN json_extract(metadata, '$.importance') + ELSE importance + END, + 0.5 + ) + """ + ) + conn.execute( + """ + UPDATE memories + SET sensitivity = CASE + WHEN lower(memory) LIKE '%password%' OR lower(memory) LIKE '%api key%' OR lower(memory) LIKE '%token%' + THEN 'secret' + WHEN lower(memory) LIKE '%health%' OR lower(memory) LIKE '%medical%' + THEN 'sensitive' + WHEN lower(memory) LIKE '%bank%' OR lower(memory) LIKE '%salary%' OR lower(memory) LIKE '%credit card%' + THEN 'sensitive' + ELSE COALESCE(NULLIF(sensitivity, ''), 'normal') + END + """ + ) + # Keep memory_refcounts bootstrapped for existing memories. + conn.execute( + """ + INSERT OR IGNORE INTO memory_refcounts (memory_id, strong_count, weak_count) + SELECT id, 0, 0 FROM memories + """ + ) + self._seed_default_namespaces(conn) + self._seed_invariants(conn) + + def _seed_default_namespaces(self, conn: sqlite3.Connection) -> None: + users = conn.execute( + """ + SELECT DISTINCT user_id FROM memories + WHERE user_id IS NOT NULL AND user_id != '' + """ + ).fetchall() + now = datetime.utcnow().isoformat() + for row in users: + user_id = row["user_id"] + conn.execute( + """ + INSERT OR IGNORE INTO namespaces (id, user_id, name, description, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?) + """, + (str(uuid.uuid4()), user_id, "default", "Default namespace", now, now), + ) + + def _seed_invariants(self, conn: sqlite3.Connection) -> None: + """Bootstrap protected invariants from self profile and explicit memories.""" + rows = conn.execute( + """ + SELECT id, user_id, memory, metadata + FROM memories + WHERE tombstone = 0 AND ( + lower(memory) LIKE 'name:%' + OR lower(memory) LIKE 'my name is %' + OR lower(memory) LIKE '%@%' + OR json_extract(metadata, '$.policy_explicit') = 1 + OR json_extract(metadata, '$.policy_explicit') = 'true' + ) + ORDER BY created_at DESC + """ + ).fetchall() + + for row in rows: + memory = (row["memory"] or "").strip() + memory_lower = memory.lower() + key = None + value = None + category = "identity" + + if memory_lower.startswith("name:"): + key = "identity.name" + value = memory.split(":", 1)[1].strip() + elif memory_lower.startswith("my name is "): + key = "identity.name" + value = memory[11:].strip() + elif "@" in memory and " " not in memory.strip(): + key = "identity.primary_email" + value = memory.strip() + elif "email" in memory_lower and "@" in memory: + key = "identity.primary_email" + value = memory.strip() + + if not key or not value: + continue + + conn.execute( + """ + INSERT INTO invariants ( + id, user_id, invariant_key, invariant_value, category, confidence, source_memory_id + ) VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(user_id, invariant_key) DO UPDATE SET + invariant_value=excluded.invariant_value, + confidence=max(invariants.confidence, excluded.confidence), + source_memory_id=excluded.source_memory_id, + updated_at=CURRENT_TIMESTAMP + """, + ( + str(uuid.uuid4()), + row["user_id"] or "default", + key, + value, + category, + 0.9, + row["id"], + ), + ) + + # Seed from self profile summary/name if available. + profile_rows = conn.execute( + """ + SELECT id, user_id, name + FROM profiles + WHERE profile_type = 'self' + """ + ).fetchall() + for row in profile_rows: + if not row["name"] or row["name"].lower() == "self": + continue + conn.execute( + """ + INSERT INTO invariants ( + id, user_id, invariant_key, invariant_value, category, confidence, source_memory_id + ) VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(user_id, invariant_key) DO NOTHING + """, + ( + str(uuid.uuid4()), + row["user_id"] or "default", + "identity.name", + row["name"], + "identity", + 0.95, + None, + ), + ) + + def _is_migration_applied(self, conn: sqlite3.Connection, version: str) -> bool: + row = conn.execute( + "SELECT 1 FROM schema_migrations WHERE version = ?", + (version,), + ).fetchone() + return row is not None + + def _migrate_add_column_conn( + self, + conn: sqlite3.Connection, + table: str, + column: str, + col_type: str, + ) -> None: + """Add a column using an existing connection, if missing.""" + try: + conn.execute(f"ALTER TABLE {table} ADD COLUMN {column} {col_type}") + except sqlite3.OperationalError: + pass + def add_memory(self, memory_data: Dict[str, Any]) -> str: memory_id = memory_data.get("id", str(uuid.uuid4())) now = datetime.utcnow().isoformat() + metadata = memory_data.get("metadata", {}) or {} + source_app = memory_data.get("source_app") or memory_data.get("app_id") or metadata.get("source_app") with self._get_connection() as conn: conn.execute( @@ -184,8 +638,10 @@ def add_memory(self, memory_data: Dict[str, Any]) -> str: id, memory, user_id, agent_id, run_id, app_id, metadata, categories, immutable, expiration_date, created_at, updated_at, layer, strength, access_count, - last_accessed, embedding, related_memories, source_memories, tombstone - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + last_accessed, embedding, related_memories, source_memories, tombstone, + confidentiality_scope, namespace, source_type, source_app, source_event_id, decay_lambda, + status, importance, sensitivity + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( memory_id, @@ -208,9 +664,26 @@ def add_memory(self, memory_data: Dict[str, Any]) -> str: json.dumps(memory_data.get("related_memories", [])), json.dumps(memory_data.get("source_memories", [])), 1 if memory_data.get("tombstone", False) else 0, + memory_data.get("confidentiality_scope", "work"), + memory_data.get("namespace", metadata.get("namespace", "default")), + memory_data.get("source_type") or metadata.get("source_type") or "mcp", + source_app, + memory_data.get("source_event_id") or metadata.get("source_event_id"), + memory_data.get("decay_lambda", 0.12), + memory_data.get("status", "active"), + memory_data.get("importance", metadata.get("importance", 0.5)), + memory_data.get("sensitivity", metadata.get("sensitivity", "normal")), ), ) + conn.execute( + """ + INSERT OR IGNORE INTO memory_refcounts (memory_id, strong_count, weak_count) + VALUES (?, 0, 0) + """, + (memory_id,), + ) + self._log_event(memory_id, "ADD", new_value=memory_data.get("memory")) return memory_id @@ -234,6 +707,7 @@ def get_all_memories( run_id: Optional[str] = None, app_id: Optional[str] = None, layer: Optional[str] = None, + namespace: Optional[str] = None, min_strength: float = 0.0, include_tombstoned: bool = False, ) -> List[Dict[str, Any]]: @@ -257,6 +731,9 @@ def get_all_memories( if layer: query += " AND layer = ?" params.append(layer) + if namespace: + query += " AND namespace = ?" + params.append(namespace) query += " ORDER BY strength DESC" @@ -462,11 +939,8 @@ def _category_row_to_dict(self, row: sqlite3.Row) -> Dict[str, Any]: def _migrate_add_column(self, table: str, column: str, col_type: str) -> None: """Add a column to an existing table if it doesn't already exist.""" - try: - with self._get_connection() as conn: - conn.execute(f"ALTER TABLE {table} ADD COLUMN {column} {col_type}") - except sqlite3.OperationalError: - pass # Column already exists + with self._get_connection() as conn: + self._migrate_add_column_conn(conn, table, column, col_type) # ========================================================================= # Scene methods @@ -480,8 +954,9 @@ def add_scene(self, scene_data: Dict[str, Any]) -> str: INSERT INTO scenes ( id, user_id, title, summary, topic, location, participants, memory_ids, start_time, end_time, - embedding, strength, access_count, tombstone - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + embedding, strength, access_count, tombstone, + layer, scene_strength, topic_embedding_ref, namespace + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( scene_id, @@ -498,6 +973,10 @@ def add_scene(self, scene_data: Dict[str, Any]) -> str: scene_data.get("strength", 1.0), scene_data.get("access_count", 0), 1 if scene_data.get("tombstone", False) else 0, + scene_data.get("layer", "sml"), + scene_data.get("scene_strength", scene_data.get("strength", 1.0)), + scene_data.get("topic_embedding_ref"), + scene_data.get("namespace", "default"), ), ) return scene_id @@ -550,6 +1029,7 @@ def get_scenes( topic: Optional[str] = None, start_after: Optional[str] = None, start_before: Optional[str] = None, + namespace: Optional[str] = None, limit: int = 50, ) -> List[Dict[str, Any]]: query = "SELECT * FROM scenes WHERE tombstone = 0" @@ -566,6 +1046,9 @@ def get_scenes( if start_before: query += " AND start_time <= ?" params.append(start_before) + if namespace: + query += " AND namespace = ?" + params.append(namespace) query += " ORDER BY start_time DESC LIMIT ?" params.append(limit) with self._get_connection() as conn: @@ -618,8 +1101,8 @@ def add_profile(self, profile_data: Dict[str, Any]) -> str: id, user_id, name, profile_type, narrative, facts, preferences, relationships, sentiment, theory_of_mind, aliases, embedding, strength, - created_at, updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + created_at, updated_at, role_bias, profile_summary + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( profile_id, @@ -637,6 +1120,8 @@ def add_profile(self, profile_data: Dict[str, Any]) -> str: profile_data.get("strength", 1.0), profile_data.get("created_at", now), profile_data.get("updated_at", now), + profile_data.get("role_bias"), + profile_data.get("profile_summary"), ), ) return profile_id @@ -743,3 +1228,1162 @@ def get_memories_by_category( (f'%"{category_id}"%', min_strength, limit), ).fetchall() return [self._row_to_dict(row) for row in rows] + + # ========================================================================= + # v2 Session methods + # ========================================================================= + + def create_session(self, session_data: Dict[str, Any]) -> str: + session_id = session_data.get("id", str(uuid.uuid4())) + with self._get_connection() as conn: + conn.execute( + """ + INSERT INTO sessions ( + id, token_hash, user_id, agent_id, + allowed_confidentiality_scopes, capabilities, namespaces, expires_at, revoked_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + session_id, + session_data.get("token_hash"), + session_data.get("user_id"), + session_data.get("agent_id"), + json.dumps(session_data.get("allowed_confidentiality_scopes", [])), + json.dumps(session_data.get("capabilities", [])), + json.dumps(session_data.get("namespaces", [])), + session_data.get("expires_at"), + session_data.get("revoked_at"), + ), + ) + return session_id + + def get_session_by_token_hash(self, token_hash: str) -> Optional[Dict[str, Any]]: + with self._get_connection() as conn: + row = conn.execute( + "SELECT * FROM sessions WHERE token_hash = ?", + (token_hash,), + ).fetchone() + if row: + data = dict(row) + data["allowed_confidentiality_scopes"] = self._parse_json_value( + data.get("allowed_confidentiality_scopes"), [] + ) + data["capabilities"] = self._parse_json_value(data.get("capabilities"), []) + data["namespaces"] = self._parse_json_value(data.get("namespaces"), []) + return data + return None + + def revoke_session(self, session_id: str) -> bool: + with self._get_connection() as conn: + conn.execute( + "UPDATE sessions SET revoked_at = ? WHERE id = ?", + (datetime.utcnow().isoformat(), session_id), + ) + return True + + # ========================================================================= + # v2 Staging / proposal methods + # ========================================================================= + + def add_proposal_commit(self, commit_data: Dict[str, Any], changes: Optional[List[Dict[str, Any]]] = None) -> str: + commit_id = commit_data.get("id", str(uuid.uuid4())) + with self._get_connection() as conn: + conn.execute( + """ + INSERT INTO proposal_commits ( + id, user_id, agent_id, scope, status, checks, preview, provenance, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + commit_id, + commit_data.get("user_id"), + commit_data.get("agent_id"), + commit_data.get("scope"), + commit_data.get("status", "PENDING"), + json.dumps(commit_data.get("checks", {})), + json.dumps(commit_data.get("preview", {})), + json.dumps(commit_data.get("provenance", {})), + commit_data.get("created_at", datetime.utcnow().isoformat()), + commit_data.get("updated_at", datetime.utcnow().isoformat()), + ), + ) + for change in changes or []: + conn.execute( + """ + INSERT INTO proposal_changes ( + id, commit_id, op, target, target_id, patch, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ( + change.get("id", str(uuid.uuid4())), + commit_id, + change.get("op", "ADD"), + change.get("target", "memory_item"), + change.get("target_id"), + json.dumps(change.get("patch", {})), + change.get("created_at", datetime.utcnow().isoformat()), + ), + ) + return commit_id + + def get_proposal_commit(self, commit_id: str) -> Optional[Dict[str, Any]]: + with self._get_connection() as conn: + row = conn.execute( + "SELECT * FROM proposal_commits WHERE id = ?", + (commit_id,), + ).fetchone() + if not row: + return None + data = dict(row) + data["checks"] = self._parse_json_value(data.get("checks"), {}) + data["preview"] = self._parse_json_value(data.get("preview"), {}) + data["provenance"] = self._parse_json_value(data.get("provenance"), {}) + data["changes"] = self.get_proposal_changes(commit_id) + return data + + def list_proposal_commits( + self, + user_id: Optional[str] = None, + status: Optional[str] = None, + limit: int = 100, + ) -> List[Dict[str, Any]]: + query = "SELECT * FROM proposal_commits WHERE 1=1" + params: List[Any] = [] + if user_id: + query += " AND user_id = ?" + params.append(user_id) + if status: + query += " AND status = ?" + params.append(status) + query += " ORDER BY created_at DESC LIMIT ?" + params.append(limit) + with self._get_connection() as conn: + rows = conn.execute(query, params).fetchall() + commits: List[Dict[str, Any]] = [] + for row in rows: + data = dict(row) + data["checks"] = self._parse_json_value(data.get("checks"), {}) + data["preview"] = self._parse_json_value(data.get("preview"), {}) + data["provenance"] = self._parse_json_value(data.get("provenance"), {}) + commits.append(data) + return commits + + def get_proposal_changes(self, commit_id: str) -> List[Dict[str, Any]]: + with self._get_connection() as conn: + rows = conn.execute( + "SELECT * FROM proposal_changes WHERE commit_id = ? ORDER BY created_at ASC", + (commit_id,), + ).fetchall() + changes = [dict(row) for row in rows] + for change in changes: + change["patch"] = self._parse_json_value(change.get("patch"), {}) + return changes + + def update_proposal_commit(self, commit_id: str, updates: Dict[str, Any]) -> bool: + set_clauses = [] + params: List[Any] = [] + for key, value in updates.items(): + if key in {"checks", "preview", "provenance"}: + value = json.dumps(value) + set_clauses.append(f"{key} = ?") + params.append(value) + set_clauses.append("updated_at = ?") + params.append(datetime.utcnow().isoformat()) + params.append(commit_id) + with self._get_connection() as conn: + conn.execute( + f"UPDATE proposal_commits SET {', '.join(set_clauses)} WHERE id = ?", + params, + ) + return True + + def transition_proposal_commit_status( + self, + commit_id: str, + *, + from_statuses: Iterable[str], + to_status: str, + updates: Optional[Dict[str, Any]] = None, + ) -> bool: + normalized_from = [str(status or "").upper() for status in from_statuses if str(status or "").strip()] + if not normalized_from: + return False + + set_clauses = ["status = ?", "updated_at = ?"] + params: List[Any] = [str(to_status or "").upper(), datetime.utcnow().isoformat()] + for key, value in (updates or {}).items(): + if key in {"checks", "preview", "provenance"}: + value = json.dumps(value) + set_clauses.append(f"{key} = ?") + params.append(value) + + placeholders = ", ".join("?" for _ in normalized_from) + params.append(commit_id) + params.extend(normalized_from) + + with self._get_connection() as conn: + cursor = conn.execute( + f""" + UPDATE proposal_commits + SET {', '.join(set_clauses)} + WHERE id = ? AND status IN ({placeholders}) + """, + params, + ) + return cursor.rowcount > 0 + + # ========================================================================= + # v2 Conflict stash methods + # ========================================================================= + + def add_conflict_stash(self, stash_data: Dict[str, Any]) -> str: + stash_id = stash_data.get("id", str(uuid.uuid4())) + with self._get_connection() as conn: + conn.execute( + """ + INSERT INTO conflict_stash ( + id, user_id, conflict_key, existing, proposed, resolution, source_commit_id, created_at, resolved_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + stash_id, + stash_data.get("user_id"), + stash_data.get("conflict_key"), + json.dumps(stash_data.get("existing", {})), + json.dumps(stash_data.get("proposed", {})), + stash_data.get("resolution", "UNRESOLVED"), + stash_data.get("source_commit_id"), + stash_data.get("created_at", datetime.utcnow().isoformat()), + stash_data.get("resolved_at"), + ), + ) + return stash_id + + def get_conflict_stash(self, stash_id: str) -> Optional[Dict[str, Any]]: + with self._get_connection() as conn: + row = conn.execute( + "SELECT * FROM conflict_stash WHERE id = ?", + (stash_id,), + ).fetchone() + if not row: + return None + data = dict(row) + data["existing"] = self._parse_json_value(data.get("existing"), {}) + data["proposed"] = self._parse_json_value(data.get("proposed"), {}) + return data + + def list_conflict_stash( + self, + user_id: Optional[str] = None, + resolution: Optional[str] = None, + limit: int = 100, + ) -> List[Dict[str, Any]]: + query = "SELECT * FROM conflict_stash WHERE 1=1" + params: List[Any] = [] + if user_id: + query += " AND user_id = ?" + params.append(user_id) + if resolution: + query += " AND resolution = ?" + params.append(resolution) + query += " ORDER BY created_at DESC LIMIT ?" + params.append(limit) + with self._get_connection() as conn: + rows = conn.execute(query, params).fetchall() + results: List[Dict[str, Any]] = [] + for row in rows: + data = dict(row) + data["existing"] = self._parse_json_value(data.get("existing"), {}) + data["proposed"] = self._parse_json_value(data.get("proposed"), {}) + results.append(data) + return results + + def resolve_conflict_stash(self, stash_id: str, resolution: str) -> bool: + with self._get_connection() as conn: + conn.execute( + """ + UPDATE conflict_stash + SET resolution = ?, resolved_at = ? + WHERE id = ? + """, + (resolution, datetime.utcnow().isoformat(), stash_id), + ) + return True + + # ========================================================================= + # v2 View methods + # ========================================================================= + + def add_view(self, view_data: Dict[str, Any]) -> str: + view_id = view_data.get("id", str(uuid.uuid4())) + with self._get_connection() as conn: + conn.execute( + """ + INSERT INTO views ( + id, user_id, agent_id, timestamp, place_type, place_value, + topic_label, topic_embedding_ref, characters, raw_text, + signals, scene_id, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + view_id, + view_data.get("user_id"), + view_data.get("agent_id"), + view_data.get("timestamp", datetime.utcnow().isoformat()), + view_data.get("place_type"), + view_data.get("place_value"), + view_data.get("topic_label"), + view_data.get("topic_embedding_ref"), + json.dumps(view_data.get("characters", [])), + view_data.get("raw_text"), + json.dumps(view_data.get("signals", {})), + view_data.get("scene_id"), + view_data.get("created_at", datetime.utcnow().isoformat()), + ), + ) + return view_id + + def get_views( + self, + user_id: Optional[str] = None, + scene_id: Optional[str] = None, + limit: int = 200, + ) -> List[Dict[str, Any]]: + query = "SELECT * FROM views WHERE 1=1" + params: List[Any] = [] + if user_id: + query += " AND user_id = ?" + params.append(user_id) + if scene_id: + query += " AND scene_id = ?" + params.append(scene_id) + query += " ORDER BY timestamp DESC LIMIT ?" + params.append(limit) + with self._get_connection() as conn: + rows = conn.execute(query, params).fetchall() + views = [dict(row) for row in rows] + for view in views: + view["characters"] = self._parse_json_value(view.get("characters"), []) + view["signals"] = self._parse_json_value(view.get("signals"), {}) + return views + + # ========================================================================= + # v2 Refcount methods + # ========================================================================= + + def get_memory_refcount(self, memory_id: str) -> Dict[str, Any]: + with self._get_connection() as conn: + row = conn.execute( + "SELECT * FROM memory_refcounts WHERE memory_id = ?", + (memory_id,), + ).fetchone() + if not row: + return {"memory_id": memory_id, "strong": 0, "weak": 0, "subscribers": []} + subscribers = self.list_memory_subscribers(memory_id) + return { + "memory_id": memory_id, + "strong": int(row["strong_count"]), + "weak": int(row["weak_count"]), + "subscribers": subscribers, + } + + def adjust_memory_refcount(self, memory_id: str, strong_delta: int = 0, weak_delta: int = 0) -> Dict[str, Any]: + with self._get_connection() as conn: + conn.execute( + """ + INSERT OR IGNORE INTO memory_refcounts (memory_id, strong_count, weak_count) + VALUES (?, 0, 0) + """, + (memory_id,), + ) + conn.execute( + """ + UPDATE memory_refcounts + SET + strong_count = CASE WHEN strong_count + ? < 0 THEN 0 ELSE strong_count + ? END, + weak_count = CASE WHEN weak_count + ? < 0 THEN 0 ELSE weak_count + ? END, + updated_at = ? + WHERE memory_id = ? + """, + ( + strong_delta, + strong_delta, + weak_delta, + weak_delta, + datetime.utcnow().isoformat(), + memory_id, + ), + ) + return self.get_memory_refcount(memory_id) + + def add_memory_subscriber( + self, + memory_id: str, + subscriber: str, + ref_type: str = "weak", + ttl_hours: Optional[int] = None, + ) -> None: + now = datetime.utcnow().isoformat() + expires_at = None + if ttl_hours is not None: + try: + ttl_value = int(ttl_hours) + except Exception: + ttl_value = 0 + if ttl_value > 0: + expires_at = datetime.utcfromtimestamp( + datetime.utcnow().timestamp() + ttl_value * 3600 + ).isoformat() + elif ttl_value < 0: + expires_at = datetime.utcfromtimestamp( + datetime.utcnow().timestamp() + ttl_value * 3600 + ).isoformat() + elif ref_type == "weak": + expires_at = datetime.utcfromtimestamp( + datetime.utcnow().timestamp() + 14 * 24 * 3600 + ).isoformat() + + with self._get_connection() as conn: + existing = conn.execute( + """ + SELECT 1 FROM memory_subscribers + WHERE memory_id = ? AND subscriber = ? AND ref_type = ? + """, + (memory_id, subscriber, ref_type), + ).fetchone() + conn.execute( + """ + INSERT INTO memory_subscribers (id, memory_id, subscriber, ref_type, created_at, last_seen_at, expires_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(memory_id, subscriber, ref_type) DO UPDATE SET + last_seen_at=excluded.last_seen_at, + expires_at=excluded.expires_at + """, + ( + str(uuid.uuid4()), + memory_id, + subscriber, + ref_type, + now, + now, + expires_at, + ), + ) + if existing is None: + self.adjust_memory_refcount( + memory_id, + strong_delta=1 if ref_type == "strong" else 0, + weak_delta=1 if ref_type == "weak" else 0, + ) + + def remove_memory_subscriber(self, memory_id: str, subscriber: str, ref_type: str = "weak") -> None: + with self._get_connection() as conn: + cursor = conn.execute( + """ + DELETE FROM memory_subscribers + WHERE memory_id = ? AND subscriber = ? AND ref_type = ? + """, + (memory_id, subscriber, ref_type), + ) + if cursor.rowcount > 0: + self.adjust_memory_refcount( + memory_id, + strong_delta=-1 if ref_type == "strong" else 0, + weak_delta=-1 if ref_type == "weak" else 0, + ) + + def list_memory_subscribers(self, memory_id: str) -> List[str]: + with self._get_connection() as conn: + rows = conn.execute( + """ + SELECT subscriber, ref_type + FROM memory_subscribers + WHERE memory_id = ? + ORDER BY created_at ASC + """, + (memory_id,), + ).fetchall() + return [f"{row['subscriber']}:{row['ref_type']}" for row in rows] + + def cleanup_stale_memory_subscribers(self, now_iso: Optional[str] = None) -> int: + now_iso = now_iso or datetime.utcnow().isoformat() + with self._get_connection() as conn: + rows = conn.execute( + """ + SELECT memory_id, subscriber, ref_type + FROM memory_subscribers + WHERE expires_at IS NOT NULL AND expires_at <= ? + """, + (now_iso,), + ).fetchall() + if not rows: + return 0 + removed = 0 + for row in rows: + cursor = conn.execute( + """ + DELETE FROM memory_subscribers + WHERE memory_id = ? AND subscriber = ? AND ref_type = ? + """, + (row["memory_id"], row["subscriber"], row["ref_type"]), + ) + if cursor.rowcount <= 0: + continue + removed += cursor.rowcount + conn.execute( + """ + UPDATE memory_refcounts + SET + strong_count = CASE + WHEN ? = 'strong' THEN CASE WHEN strong_count - 1 < 0 THEN 0 ELSE strong_count - 1 END + ELSE strong_count + END, + weak_count = CASE + WHEN ? = 'weak' THEN CASE WHEN weak_count - 1 < 0 THEN 0 ELSE weak_count - 1 END + ELSE weak_count + END, + updated_at = ? + WHERE memory_id = ? + """, + (row["ref_type"], row["ref_type"], now_iso, row["memory_id"]), + ) + return removed + + # ========================================================================= + # v2 Daily digest methods + # ========================================================================= + + def upsert_daily_digest(self, user_id: str, digest_date: str, payload: Dict[str, Any]) -> str: + digest_id = str(uuid.uuid4()) + with self._get_connection() as conn: + conn.execute( + """ + INSERT INTO daily_digests (id, user_id, digest_date, payload, generated_at) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT(user_id, digest_date) DO UPDATE SET + payload=excluded.payload, + generated_at=excluded.generated_at + """, + ( + digest_id, + user_id, + digest_date, + json.dumps(payload), + datetime.utcnow().isoformat(), + ), + ) + return digest_id + + def get_daily_digest(self, user_id: str, digest_date: str) -> Optional[Dict[str, Any]]: + with self._get_connection() as conn: + row = conn.execute( + """ + SELECT * FROM daily_digests + WHERE user_id = ? AND digest_date = ? + """, + (user_id, digest_date), + ).fetchone() + if not row: + return None + data = dict(row) + data["payload"] = self._parse_json_value(data.get("payload"), {}) + return data + + # ========================================================================= + # v2 Agent trust methods + # ========================================================================= + + def get_agent_trust(self, user_id: str, agent_id: str) -> Dict[str, Any]: + with self._get_connection() as conn: + row = conn.execute( + """ + SELECT * FROM agent_trust + WHERE user_id = ? AND agent_id = ? + """, + (user_id, agent_id), + ).fetchone() + if row: + return dict(row) + return { + "user_id": user_id, + "agent_id": agent_id, + "total_proposals": 0, + "approved_proposals": 0, + "rejected_proposals": 0, + "auto_stashed_proposals": 0, + "last_proposed_at": None, + "last_approved_at": None, + "trust_score": 0.0, + } + + @staticmethod + def _compute_trust_score( + *, + total_proposals: int, + approved_proposals: int, + last_approved_at: Optional[str], + ) -> float: + approval_rate = approved_proposals / total_proposals if total_proposals > 0 else 0.0 + recency_score = 0.0 + if last_approved_at: + try: + approved_dt = datetime.fromisoformat(last_approved_at) + days_since = max( + 0.0, + (datetime.utcnow() - approved_dt).total_seconds() / 86400.0, + ) + recency_score = max(0.0, 1.0 - min(days_since, 30.0) / 30.0) + except Exception: + recency_score = 0.0 + return round((approval_rate * 0.7) + (recency_score * 0.3), 4) + + def _upsert_agent_trust_row( + self, + *, + user_id: str, + agent_id: str, + total_proposals: int, + approved_proposals: int, + rejected_proposals: int, + auto_stashed_proposals: int, + last_proposed_at: Optional[str], + last_approved_at: Optional[str], + ) -> Dict[str, Any]: + trust_score = self._compute_trust_score( + total_proposals=total_proposals, + approved_proposals=approved_proposals, + last_approved_at=last_approved_at, + ) + with self._get_connection() as conn: + conn.execute( + """ + INSERT INTO agent_trust ( + user_id, agent_id, total_proposals, approved_proposals, rejected_proposals, + auto_stashed_proposals, last_proposed_at, last_approved_at, trust_score, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(user_id, agent_id) DO UPDATE SET + total_proposals=excluded.total_proposals, + approved_proposals=excluded.approved_proposals, + rejected_proposals=excluded.rejected_proposals, + auto_stashed_proposals=excluded.auto_stashed_proposals, + last_proposed_at=excluded.last_proposed_at, + last_approved_at=excluded.last_approved_at, + trust_score=excluded.trust_score, + updated_at=excluded.updated_at + """, + ( + user_id, + agent_id, + int(total_proposals), + int(approved_proposals), + int(rejected_proposals), + int(auto_stashed_proposals), + last_proposed_at, + last_approved_at, + trust_score, + datetime.utcnow().isoformat(), + ), + ) + return self.get_agent_trust(user_id=user_id, agent_id=agent_id) + + def record_agent_proposal(self, user_id: str, agent_id: Optional[str], status: str) -> Dict[str, Any]: + if not user_id or not agent_id: + return {} + current = self.get_agent_trust(user_id=user_id, agent_id=agent_id) + now_iso = datetime.utcnow().isoformat() + auto_stashed = int(current.get("auto_stashed_proposals", 0)) + if (status or "").upper() == "AUTO_STASHED": + auto_stashed += 1 + return self._upsert_agent_trust_row( + user_id=user_id, + agent_id=agent_id, + total_proposals=int(current.get("total_proposals", 0)) + 1, + approved_proposals=int(current.get("approved_proposals", 0)), + rejected_proposals=int(current.get("rejected_proposals", 0)), + auto_stashed_proposals=auto_stashed, + last_proposed_at=now_iso, + last_approved_at=current.get("last_approved_at"), + ) + + def record_agent_commit_outcome(self, user_id: str, agent_id: Optional[str], outcome: str) -> Dict[str, Any]: + if not user_id or not agent_id: + return {} + current = self.get_agent_trust(user_id=user_id, agent_id=agent_id) + outcome_upper = (outcome or "").upper() + approved = int(current.get("approved_proposals", 0)) + rejected = int(current.get("rejected_proposals", 0)) + auto_stashed = int(current.get("auto_stashed_proposals", 0)) + last_approved_at = current.get("last_approved_at") + now_iso = datetime.utcnow().isoformat() + if outcome_upper == "APPROVED": + approved += 1 + last_approved_at = now_iso + elif outcome_upper == "REJECTED": + rejected += 1 + elif outcome_upper == "AUTO_STASHED": + auto_stashed += 1 + return self._upsert_agent_trust_row( + user_id=user_id, + agent_id=agent_id, + total_proposals=int(current.get("total_proposals", 0)), + approved_proposals=approved, + rejected_proposals=rejected, + auto_stashed_proposals=auto_stashed, + last_proposed_at=current.get("last_proposed_at"), + last_approved_at=last_approved_at, + ) + + # ========================================================================= + # v2 Namespace methods + # ========================================================================= + + def ensure_namespace(self, user_id: str, name: str, description: Optional[str] = None) -> str: + ns_name = (name or "default").strip() or "default" + with self._get_connection() as conn: + row = conn.execute( + """ + SELECT id FROM namespaces WHERE user_id = ? AND name = ? + """, + (user_id, ns_name), + ).fetchone() + if row: + namespace_id = row["id"] + conn.execute( + """ + UPDATE namespaces + SET description = COALESCE(?, description), updated_at = ? + WHERE id = ? + """, + (description, datetime.utcnow().isoformat(), namespace_id), + ) + return namespace_id + namespace_id = str(uuid.uuid4()) + conn.execute( + """ + INSERT INTO namespaces (id, user_id, name, description, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?) + """, + ( + namespace_id, + user_id, + ns_name, + description, + datetime.utcnow().isoformat(), + datetime.utcnow().isoformat(), + ), + ) + return namespace_id + + def list_namespaces(self, user_id: Optional[str] = None) -> List[Dict[str, Any]]: + query = "SELECT * FROM namespaces WHERE 1=1" + params: List[Any] = [] + if user_id: + query += " AND user_id = ?" + params.append(user_id) + query += " ORDER BY created_at ASC" + with self._get_connection() as conn: + rows = conn.execute(query, params).fetchall() + return [dict(row) for row in rows] + + def grant_namespace_permission( + self, + *, + user_id: str, + agent_id: str, + namespace: str, + capability: str = "read", + expires_at: Optional[str] = None, + ) -> str: + namespace_id = self.ensure_namespace(user_id=user_id, name=namespace) + permission_id = str(uuid.uuid4()) + with self._get_connection() as conn: + conn.execute( + """ + INSERT INTO namespace_permissions ( + id, namespace_id, user_id, agent_id, capability, granted_at, expires_at + ) VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(namespace_id, user_id, agent_id, capability) DO UPDATE SET + expires_at=excluded.expires_at, + granted_at=excluded.granted_at + """, + ( + permission_id, + namespace_id, + user_id, + agent_id, + capability, + datetime.utcnow().isoformat(), + expires_at, + ), + ) + return permission_id + + def list_namespace_permissions( + self, + *, + user_id: Optional[str] = None, + agent_id: Optional[str] = None, + namespace: Optional[str] = None, + include_expired: bool = False, + ) -> List[Dict[str, Any]]: + query = """ + SELECT p.*, n.name AS namespace_name + FROM namespace_permissions p + JOIN namespaces n ON n.id = p.namespace_id + WHERE 1=1 + """ + params: List[Any] = [] + if user_id: + query += " AND p.user_id = ?" + params.append(user_id) + if agent_id: + query += " AND p.agent_id = ?" + params.append(agent_id) + if namespace: + query += " AND n.name = ?" + params.append(namespace) + if not include_expired: + query += " AND (p.expires_at IS NULL OR p.expires_at > ?)" + params.append(datetime.utcnow().isoformat()) + query += " ORDER BY p.granted_at DESC" + with self._get_connection() as conn: + rows = conn.execute(query, params).fetchall() + return [dict(row) for row in rows] + + def get_agent_allowed_namespaces(self, user_id: str, agent_id: Optional[str], capability: str = "read") -> List[str]: + # User-local or missing agent context falls back to default namespace. + if not agent_id: + return ["default"] + with self._get_connection() as conn: + rows = conn.execute( + """ + SELECT n.name AS namespace_name + FROM namespace_permissions p + JOIN namespaces n ON n.id = p.namespace_id + WHERE p.user_id = ? + AND p.agent_id IN (?, '*') + AND p.capability IN (?, '*') + AND (p.expires_at IS NULL OR p.expires_at > ?) + """, + (user_id, agent_id, capability, datetime.utcnow().isoformat()), + ).fetchall() + namespaces = [str(row["namespace_name"]) for row in rows if row["namespace_name"]] + if "default" not in namespaces: + namespaces.append("default") + return sorted(set(namespaces)) + + # ========================================================================= + # v2 Agent policy methods + # ========================================================================= + + def upsert_agent_policy( + self, + *, + user_id: str, + agent_id: str, + allowed_confidentiality_scopes: Optional[List[str]] = None, + allowed_capabilities: Optional[List[str]] = None, + allowed_namespaces: Optional[List[str]] = None, + ) -> Dict[str, Any]: + policy_id = str(uuid.uuid4()) + scopes = sorted({ + str(scope).strip().lower() + for scope in (allowed_confidentiality_scopes or []) + if str(scope).strip() + }) + capabilities = sorted({ + str(capability).strip().lower() + for capability in (allowed_capabilities or []) + if str(capability).strip() + }) + namespaces = sorted({ + str(namespace).strip() + for namespace in (allowed_namespaces or []) + if str(namespace).strip() + }) + now_iso = datetime.utcnow().isoformat() + with self._get_connection() as conn: + conn.execute( + """ + INSERT INTO agent_policies ( + id, + user_id, + agent_id, + allowed_confidentiality_scopes, + allowed_capabilities, + allowed_namespaces, + created_at, + updated_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(user_id, agent_id) DO UPDATE SET + allowed_confidentiality_scopes=excluded.allowed_confidentiality_scopes, + allowed_capabilities=excluded.allowed_capabilities, + allowed_namespaces=excluded.allowed_namespaces, + updated_at=excluded.updated_at + """, + ( + policy_id, + user_id, + agent_id, + json.dumps(scopes), + json.dumps(capabilities), + json.dumps(namespaces), + now_iso, + now_iso, + ), + ) + policy = self.get_agent_policy(user_id=user_id, agent_id=agent_id, include_wildcard=False) + return policy or { + "id": policy_id, + "user_id": user_id, + "agent_id": agent_id, + "allowed_confidentiality_scopes": scopes, + "allowed_capabilities": capabilities, + "allowed_namespaces": namespaces, + } + + def get_agent_policy( + self, + *, + user_id: str, + agent_id: str, + include_wildcard: bool = True, + ) -> Optional[Dict[str, Any]]: + with self._get_connection() as conn: + if include_wildcard: + row = conn.execute( + """ + SELECT *, + CASE WHEN agent_id = ? THEN 0 ELSE 1 END AS _priority + FROM agent_policies + WHERE user_id = ? + AND agent_id IN (?, '*') + ORDER BY _priority ASC + LIMIT 1 + """, + (agent_id, user_id, agent_id), + ).fetchone() + else: + row = conn.execute( + """ + SELECT * FROM agent_policies + WHERE user_id = ? AND agent_id = ? + LIMIT 1 + """, + (user_id, agent_id), + ).fetchone() + + if not row: + return None + + data = dict(row) + data["allowed_confidentiality_scopes"] = self._parse_json_value(data.get("allowed_confidentiality_scopes"), []) + data["allowed_capabilities"] = self._parse_json_value(data.get("allowed_capabilities"), []) + data["allowed_namespaces"] = self._parse_json_value(data.get("allowed_namespaces"), []) + if include_wildcard: + data["policy_scope"] = "exact" if data.get("agent_id") == agent_id else "wildcard" + data.pop("_priority", None) + return data + + def list_agent_policies(self, *, user_id: Optional[str] = None) -> List[Dict[str, Any]]: + query = "SELECT * FROM agent_policies WHERE 1=1" + params: List[Any] = [] + if user_id: + query += " AND user_id = ?" + params.append(user_id) + query += " ORDER BY user_id ASC, agent_id ASC" + with self._get_connection() as conn: + rows = conn.execute(query, params).fetchall() + + policies: List[Dict[str, Any]] = [] + for row in rows: + data = dict(row) + data["allowed_confidentiality_scopes"] = self._parse_json_value(data.get("allowed_confidentiality_scopes"), []) + data["allowed_capabilities"] = self._parse_json_value(data.get("allowed_capabilities"), []) + data["allowed_namespaces"] = self._parse_json_value(data.get("allowed_namespaces"), []) + policies.append(data) + return policies + + def delete_agent_policy(self, *, user_id: str, agent_id: str) -> bool: + with self._get_connection() as conn: + cursor = conn.execute( + """ + DELETE FROM agent_policies + WHERE user_id = ? AND agent_id = ? + """, + (user_id, agent_id), + ) + return cursor.rowcount > 0 + + def list_user_ids(self) -> List[str]: + with self._get_connection() as conn: + rows = conn.execute( + """ + SELECT DISTINCT user_id FROM ( + SELECT user_id FROM memories + UNION ALL + SELECT user_id FROM sessions + UNION ALL + SELECT user_id FROM proposal_commits + ) + WHERE user_id IS NOT NULL AND user_id != '' + ORDER BY user_id + """ + ).fetchall() + return [str(row["user_id"]) for row in rows if row["user_id"]] + + # ========================================================================= + # v2 Invariant methods + # ========================================================================= + + def upsert_invariant( + self, + user_id: str, + invariant_key: str, + invariant_value: str, + category: str = "identity", + confidence: float = 0.7, + source_memory_id: Optional[str] = None, + ) -> str: + invariant_id = str(uuid.uuid4()) + with self._get_connection() as conn: + conn.execute( + """ + INSERT INTO invariants ( + id, user_id, invariant_key, invariant_value, category, confidence, source_memory_id + ) VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(user_id, invariant_key) DO UPDATE SET + invariant_value=excluded.invariant_value, + category=excluded.category, + confidence=excluded.confidence, + source_memory_id=excluded.source_memory_id, + updated_at=CURRENT_TIMESTAMP + """, + ( + invariant_id, + user_id, + invariant_key, + invariant_value, + category, + confidence, + source_memory_id, + ), + ) + return invariant_id + + def get_invariant(self, user_id: str, invariant_key: str) -> Optional[Dict[str, Any]]: + with self._get_connection() as conn: + row = conn.execute( + """ + SELECT * FROM invariants + WHERE user_id = ? AND invariant_key = ? + """, + (user_id, invariant_key), + ).fetchone() + return dict(row) if row else None + + def list_invariants(self, user_id: str) -> List[Dict[str, Any]]: + with self._get_connection() as conn: + rows = conn.execute( + """ + SELECT * FROM invariants + WHERE user_id = ? + ORDER BY confidence DESC, updated_at DESC + """, + (user_id,), + ).fetchall() + return [dict(row) for row in rows] + + # ========================================================================= + # Dashboard / Visualization methods + # ========================================================================= + + def get_constellation_data(self, user_id: Optional[str] = None, limit: int = 200) -> Dict[str, Any]: + """Build graph data for the constellation visualizer.""" + with self._get_connection() as conn: + # Nodes: memories + mem_query = "SELECT id, memory, strength, layer, categories, created_at FROM memories WHERE tombstone = 0" + params: List[Any] = [] + if user_id: + mem_query += " AND user_id = ?" + params.append(user_id) + mem_query += " ORDER BY strength DESC LIMIT ?" + params.append(limit) + mem_rows = conn.execute(mem_query, params).fetchall() + + nodes = [] + node_ids = set() + for row in mem_rows: + cats = row["categories"] + if cats: + try: + cats = json.loads(cats) + except Exception: + cats = [] + else: + cats = [] + nodes.append({ + "id": row["id"], + "memory": (row["memory"] or "")[:120], + "strength": row["strength"], + "layer": row["layer"], + "categories": cats, + "created_at": row["created_at"], + }) + node_ids.add(row["id"]) + + # Edges from scene_memories (memories sharing a scene) + edges: List[Dict[str, Any]] = [] + if node_ids: + placeholders = ",".join("?" for _ in node_ids) + scene_rows = conn.execute( + f""" + SELECT a.memory_id AS source, b.memory_id AS target, a.scene_id + FROM scene_memories a + JOIN scene_memories b ON a.scene_id = b.scene_id AND a.memory_id < b.memory_id + WHERE a.memory_id IN ({placeholders}) AND b.memory_id IN ({placeholders}) + """, + list(node_ids) + list(node_ids), + ).fetchall() + for row in scene_rows: + edges.append({"source": row["source"], "target": row["target"], "type": "scene"}) + + # Edges from profile_memories (memories sharing a profile) + profile_rows = conn.execute( + f""" + SELECT a.memory_id AS source, b.memory_id AS target, a.profile_id + FROM profile_memories a + JOIN profile_memories b ON a.profile_id = b.profile_id AND a.memory_id < b.memory_id + WHERE a.memory_id IN ({placeholders}) AND b.memory_id IN ({placeholders}) + """, + list(node_ids) + list(node_ids), + ).fetchall() + for row in profile_rows: + edges.append({"source": row["source"], "target": row["target"], "type": "profile"}) + + return {"nodes": nodes, "edges": edges} + + def get_decay_log_entries(self, limit: int = 20) -> List[Dict[str, Any]]: + """Return recent decay log entries for the dashboard sparkline.""" + with self._get_connection() as conn: + rows = conn.execute( + "SELECT * FROM decay_log ORDER BY run_at DESC LIMIT ?", + (limit,), + ).fetchall() + return [dict(row) for row in rows] + + # ========================================================================= + # Utilities + # ========================================================================= + + @staticmethod + def _parse_json_value(value: Any, default: Any) -> Any: + if value is None: + return default + if isinstance(value, (dict, list)): + return value + try: + return json.loads(value) + except Exception: + return default diff --git a/engram/decay/__init__.py b/engram/decay/__init__.py new file mode 100644 index 0000000..3fa13d7 --- /dev/null +++ b/engram/decay/__init__.py @@ -0,0 +1,5 @@ +"""Decay helpers for Engram v2.""" + +from engram.decay.refcounts import RefCountManager + +__all__ = ["RefCountManager"] diff --git a/engram/decay/refcounts.py b/engram/decay/refcounts.py new file mode 100644 index 0000000..67954fd --- /dev/null +++ b/engram/decay/refcounts.py @@ -0,0 +1,45 @@ +"""Reference-aware retention helpers.""" + +from __future__ import annotations + +from typing import Any, Dict, Iterable + + +class RefCountManager: + def __init__(self, db): + self.db = db + + def record_retrieval_refs( + self, + memory_ids: Iterable[str], + agent_id: str, + strong: bool = False, + ttl_hours: int = 24 * 14, + ) -> None: + ref_type = "strong" if strong else "weak" + subscriber = f"agent:{agent_id}" + for memory_id in memory_ids: + if not memory_id: + continue + self.db.add_memory_subscriber( + str(memory_id), + subscriber, + ref_type=ref_type, + ttl_hours=None if strong else ttl_hours, + ) + + def get_refcount(self, memory_id: str) -> Dict[str, Any]: + return self.db.get_memory_refcount(memory_id) + + def should_protect_from_forgetting(self, memory_id: str) -> bool: + ref = self.get_refcount(memory_id) + return int(ref.get("strong", 0)) > 0 + + def weak_dampening_factor(self, memory_id: str) -> float: + ref = self.get_refcount(memory_id) + weak = int(ref.get("weak", 0)) + # More weak refs => slower forgetting. + return 1.0 + min(weak, 10) * 0.15 + + def cleanup_stale_refs(self) -> int: + return int(self.db.cleanup_stale_memory_subscribers()) diff --git a/engram/integrations/claude_code_plugin.py b/engram/integrations/claude_code_plugin.py index a67f748..282ef5a 100644 --- a/engram/integrations/claude_code_plugin.py +++ b/engram/integrations/claude_code_plugin.py @@ -173,6 +173,18 @@ def main() -> None: sys.stdout.write("{}") ''' +# Prefer the tracked hook file so runtime deployments stay in sync with +# `plugins/engram-memory/hooks/prompt_context.py` updates. +_TRACKED_HOOK = ( + Path(__file__).resolve().parents[2] + / "plugins" + / "engram-memory" + / "hooks" + / "prompt_context.py" +) +if _TRACKED_HOOK.exists(): + _PROMPT_CONTEXT_PY = _TRACKED_HOOK.read_text(encoding="utf-8") + _CMD_ENGRAM_MD = """\ --- name: engram diff --git a/engram/integrations/openclaw.py b/engram/integrations/openclaw.py index f8b88ab..89e92e5 100644 --- a/engram/integrations/openclaw.py +++ b/engram/integrations/openclaw.py @@ -37,16 +37,32 @@ | Action | Method | Endpoint | Key fields | |--------|--------|----------|------------| -| Search | POST | /v1/search | `query`, `limit` | -| Store | POST | /v1/memories | `content`, `source_app`, `infer` | +| Session | POST | /v1/sessions | `user_id`, `agent_id`, `allowed_confidentiality_scopes` | +| Search | POST | /v1/search | `query`, `user_id`, `agent_id`, `limit` | +| Store | POST | /v1/memories | `content`, `mode`, `scope`, `source_app` | | Health | GET | /health | — | --- ## Rules -### 1 – Search at the start of every user message -Before you begin composing a reply, silently POST to +### 1 – Create session token, then search each user message +Before the first memory call in a session, create a capability token via +`{engram_api_url}/v1/sessions` and include it in all subsequent requests: +`Authorization: Bearer `. + +Session request body: +```json +{{ + "user_id": "default", + "agent_id": "openclaw", + "allowed_confidentiality_scopes": ["work", "personal", "finance", "health", "private"], + "capabilities": ["search", "propose_write"], + "ttl_minutes": 1440 +}} +``` + +Then before composing a reply, silently POST to `{engram_api_url}/v1/search` with a `query` derived from the user's message. Inject any returned snippets into your reasoning context. Do **not** surface the raw search payload to the user. @@ -55,6 +71,8 @@ ```json {{ "query": "", + "user_id": "default", + "agent_id": "openclaw", "limit": 5 }} ``` @@ -70,6 +88,10 @@ ```json {{ "content": "", + "mode": "staging", + "scope": "work", + "user_id": "default", + "agent_id": "openclaw", "source_app": "openclaw", "infer": false, "categories": [""] diff --git a/engram/main_cli.py b/engram/main_cli.py index 770d430..bfb02b2 100644 --- a/engram/main_cli.py +++ b/engram/main_cli.py @@ -11,6 +11,8 @@ engram export Export memories to JSON engram import file.json Import memories from JSON (Engram or Mem0 format) engram server Start the REST API server + engram serve Alias for server + engram status Show version, config paths, and DB stats """ from __future__ import annotations @@ -255,6 +257,59 @@ def cmd_server(args): run() +def cmd_status(args): + """Show version, config paths, installed integrations, and DB stats.""" + import engram + + print(f"Engram v{engram.__version__}") + print("=" * 40) + + # Config paths + home = Path.home() + configs = { + "Claude Code": home / ".claude.json", + "Claude Desktop": home / "Library" / "Application Support" / "Claude" / "claude_desktop_config.json", + "Cursor": home / ".cursor" / "mcp.json", + "Codex": home / ".codex" / "config.toml", + "Plugin": home / ".engram" / "claude-plugin" / "engram-memory", + } + + print("\nIntegrations:") + for name, path in configs.items(): + installed = path.exists() + status = "installed" if installed else "not found" + print(f" {name}: {status}") + + # Data directory + data_dir = Path.home() / ".engram" + print(f"\nData directory: {data_dir}") + + # DB stats + db_path = data_dir / "engram.db" + if db_path.exists(): + size_mb = db_path.stat().st_size / (1024 * 1024) + print(f"Database: {db_path} ({size_mb:.1f} MB)") + else: + # Try default location + default_db = Path("engram.db") + if default_db.exists(): + size_mb = default_db.stat().st_size / (1024 * 1024) + print(f"Database: {default_db} ({size_mb:.1f} MB)") + else: + print("Database: not found") + + # Memory stats (if available) + try: + from engram import Engram + memory = Engram(in_memory=False) + stats = memory.stats() + print(f"\nMemories: {stats.get('total', 0)} total") + print(f" Short-term (SML): {stats.get('sml_count', 0)}") + print(f" Long-term (LML): {stats.get('lml_count', 0)}") + except Exception: + pass + + def cmd_categories(args): """List categories.""" from engram import Engram @@ -287,7 +342,8 @@ def main(): engram server # Start REST API """, ) - parser.add_argument("--version", action="version", version="%(prog)s 0.2.0") + from engram import __version__ + parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}") subparsers = parser.add_subparsers(dest="command", help="Available commands") @@ -348,6 +404,16 @@ def main(): p_server.add_argument("--port", "-p", type=int, default=8100, help="Port") p_server.add_argument("--reload", action="store_true", help="Auto-reload on changes") + # Serve command (alias for server) + p_serve = subparsers.add_parser("serve", help="Start REST API server (alias for server)") + p_serve.add_argument("--host", default="127.0.0.1", help="Host to bind") + p_serve.add_argument("--port", "-p", type=int, default=8100, help="Port") + p_serve.add_argument("--reload", action="store_true", help="Auto-reload on changes") + + # Status command + p_status = subparsers.add_parser("status", help="Show version, config paths, and DB stats") + p_status.add_argument("--json", "-j", action="store_true", help="Output as JSON") + args = parser.parse_args() if args.command is None: @@ -366,6 +432,8 @@ def main(): "import": cmd_import, "categories": cmd_categories, "server": cmd_server, + "serve": cmd_server, + "status": cmd_status, } handler = commands.get(args.command) diff --git a/engram/mcp_server.py b/engram/mcp_server.py index b8d508b..d256bbb 100644 --- a/engram/mcp_server.py +++ b/engram/mcp_server.py @@ -169,7 +169,7 @@ async def list_tools() -> List[Tool]: return [ Tool( name="add_memory", - description="Add a new memory to engram with full control over categories, scope, and metadata. For simple saves without those extras, prefer the `remember` tool instead. Use `add_memory` when you need categories, scope, agent_id, or other advanced options.", + description="Create a write proposal in staging by default. Supports direct writes only when mode='direct' in trusted local contexts. For simple saves without extras, prefer `remember`.", inputSchema={ "type": "object", "properties": { @@ -204,7 +204,20 @@ async def list_tools() -> List[Tool]: }, "scope": { "type": "string", - "description": "Memory scope: agent|connector|category|global" + "description": "Confidentiality scope: work|personal|finance|health|private" + }, + "namespace": { + "type": "string", + "description": "Namespace for scoped memory segmentation (default: 'default')." + }, + "mode": { + "type": "string", + "enum": ["staging", "direct"], + "description": "Write mode. Default staging." + }, + "source_event_id": { + "type": "string", + "description": "Optional source event ID for provenance." } }, "required": ["content"] @@ -380,7 +393,7 @@ async def list_tools() -> List[Tool]: ), Tool( name="remember", - description="Quick-save a fact or preference. Stores your text directly with source_app='claude-code' and infer=False (no LLM extraction). For more control over categories, scope, or metadata, use add_memory instead.", + description="Quick-save to staging. Creates a proposal commit with source_app='claude-code' and infer=False by default.", inputSchema={ "type": "object", "properties": { @@ -397,6 +410,172 @@ async def list_tools() -> List[Tool]: "required": ["content"] } ), + Tool( + name="propose_write", + description="Create a staging proposal commit for a memory write. Preferred v2 write path for agents.", + inputSchema={ + "type": "object", + "properties": { + "content": {"type": "string"}, + "user_id": {"type": "string"}, + "agent_id": {"type": "string"}, + "categories": {"type": "array", "items": {"type": "string"}}, + "metadata": {"type": "object"}, + "scope": { + "type": "string", + "description": "Confidentiality scope: work|personal|finance|health|private" + }, + "namespace": { + "type": "string", + "description": "Namespace for scoped memory segmentation (default: 'default')." + }, + "mode": { + "type": "string", + "enum": ["staging", "direct"], + "description": "Direct mode reserved for trusted local clients." + }, + }, + "required": ["content"], + }, + ), + Tool( + name="list_pending_commits", + description="List staging commits and their statuses.", + inputSchema={ + "type": "object", + "properties": { + "user_id": {"type": "string"}, + "agent_id": {"type": "string"}, + "status": {"type": "string"}, + "limit": {"type": "integer"}, + }, + }, + ), + Tool( + name="resolve_conflict", + description="Resolve a conflict stash entry.", + inputSchema={ + "type": "object", + "properties": { + "stash_id": {"type": "string"}, + "user_id": {"type": "string"}, + "agent_id": {"type": "string"}, + "resolution": { + "type": "string", + "enum": ["UNRESOLVED", "KEEP_EXISTING", "ACCEPT_PROPOSED", "KEEP_BOTH"], + }, + }, + "required": ["stash_id", "resolution"], + }, + ), + Tool( + name="declare_namespace", + description="Declare a namespace for scoped memory access.", + inputSchema={ + "type": "object", + "properties": { + "user_id": {"type": "string"}, + "agent_id": {"type": "string"}, + "namespace": {"type": "string"}, + "description": {"type": "string"}, + }, + "required": ["namespace"], + }, + ), + Tool( + name="grant_namespace_permission", + description="Grant an agent capability on a namespace.", + inputSchema={ + "type": "object", + "properties": { + "user_id": {"type": "string"}, + "namespace": {"type": "string"}, + "agent_id": {"type": "string"}, + "requester_agent_id": {"type": "string"}, + "capability": {"type": "string"}, + "expires_at": {"type": "string"}, + }, + "required": ["namespace", "agent_id"], + }, + ), + Tool( + name="upsert_agent_policy", + description="Create or update an agent policy used to clamp capability sessions.", + inputSchema={ + "type": "object", + "properties": { + "user_id": {"type": "string"}, + "agent_id": {"type": "string"}, + "requester_agent_id": {"type": "string"}, + "allowed_confidentiality_scopes": { + "type": "array", + "items": {"type": "string"}, + }, + "allowed_capabilities": { + "type": "array", + "items": {"type": "string"}, + }, + "allowed_namespaces": { + "type": "array", + "items": {"type": "string"}, + }, + }, + "required": ["agent_id"], + }, + ), + Tool( + name="list_agent_policies", + description="List policies for a user, or fetch one policy when agent_id is provided.", + inputSchema={ + "type": "object", + "properties": { + "user_id": {"type": "string"}, + "agent_id": {"type": "string"}, + "requester_agent_id": {"type": "string"}, + "include_wildcard": {"type": "boolean"}, + }, + }, + ), + Tool( + name="delete_agent_policy", + description="Delete one policy for a user and agent.", + inputSchema={ + "type": "object", + "properties": { + "user_id": {"type": "string"}, + "agent_id": {"type": "string"}, + "requester_agent_id": {"type": "string"}, + }, + "required": ["agent_id"], + }, + ), + Tool( + name="get_agent_trust", + description="Get trust score stats for an agent.", + inputSchema={ + "type": "object", + "properties": { + "user_id": {"type": "string"}, + "agent_id": {"type": "string"}, + "requester_agent_id": {"type": "string"}, + }, + "required": ["agent_id"], + }, + ), + Tool( + name="run_sleep_cycle", + description="Run the maintenance sleep cycle once (digest + promotion + decay + ref GC).", + inputSchema={ + "type": "object", + "properties": { + "user_id": {"type": "string"}, + "agent_id": {"type": "string"}, + "date": {"type": "string"}, + "apply_decay": {"type": "boolean"}, + "cleanup_stale_refs": {"type": "boolean"}, + }, + }, + ), # ---- Episodic Scene tools ---- Tool( name="get_scene", @@ -407,7 +586,15 @@ async def list_tools() -> List[Tool]: "scene_id": { "type": "string", "description": "The ID of the scene to retrieve" - } + }, + "user_id": { + "type": "string", + "description": "User identifier (default: 'default')", + }, + "agent_id": { + "type": "string", + "description": "Agent identifier for scoped scene reads (optional)", + }, }, "required": ["scene_id"] } @@ -455,6 +642,10 @@ async def list_tools() -> List[Tool]: "type": "string", "description": "User identifier (default: 'default')" }, + "agent_id": { + "type": "string", + "description": "Agent identifier for scoped scene search (optional)", + }, "limit": { "type": "integer", "description": "Maximum number of results (default: 10)" @@ -523,26 +714,54 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]: memory = get_memory() result: Any = None - if name == "add_memory": + def _session_token( + *, + user_id: str, + agent_id: Optional[str], + capabilities: List[str], + namespaces: Optional[List[str]] = None, + ) -> str: + session = memory.create_session( + user_id=user_id, + agent_id=agent_id, + allowed_confidentiality_scopes=["work", "personal", "finance", "health", "private"], + capabilities=capabilities, + namespaces=namespaces, + ttl_minutes=24 * 60, + ) + return session["token"] + + if name in {"add_memory", "propose_write"}: content = arguments.get("content", "") user_id = arguments.get("user_id", "default") agent_id = arguments.get("agent_id") categories = arguments.get("categories") metadata = arguments.get("metadata") - agent_category = arguments.get("agent_category") - connector_id = arguments.get("connector_id") - scope = arguments.get("scope") + scope = arguments.get("scope", "work") + namespace = arguments.get("namespace", "default") + mode = arguments.get("mode", "staging") + source_event_id = arguments.get("source_event_id") + token = _session_token( + user_id=user_id, + agent_id=agent_id, + capabilities=["propose_write"], + namespaces=[namespace], + ) - result = memory.add( - messages=content, + result = memory.propose_write( + content=content, user_id=user_id, agent_id=agent_id, categories=categories, metadata=metadata, - agent_category=agent_category, - connector_id=connector_id, scope=scope, - infer=False, # Store the content directly without LLM extraction + namespace=namespace, + mode=mode, + infer=False, + token=token, + source_app="mcp", + source_type="mcp", + source_event_id=source_event_id, ) elif name == "search_memory": @@ -554,23 +773,37 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]: agent_category = arguments.get("agent_category") connector_ids = arguments.get("connector_ids") scope_filter = arguments.get("scope_filter") - - result = memory.search( - query=query, - user_id=user_id, - agent_id=agent_id, - limit=limit, - categories=categories, - agent_category=agent_category, - connector_ids=connector_ids, - scope_filter=scope_filter, - ) + if agent_id: + token = _session_token( + user_id=user_id, + agent_id=agent_id, + capabilities=["search"], + ) + result = memory.search_with_context( + query=query, + user_id=user_id, + agent_id=agent_id, + token=token, + limit=limit, + categories=categories, + ) + else: + result = memory.search( + query=query, + user_id=user_id, + agent_id=agent_id, + limit=limit, + categories=categories, + agent_category=agent_category, + connector_ids=connector_ids, + scope_filter=scope_filter, + ) # Simplify results for readability if "results" in result: result["results"] = [ { - "id": r["id"], - "memory": r["memory"], + "id": r.get("id"), + "memory": r.get("memory", r.get("details", "")), "score": round(r.get("composite_score", r.get("score", 0)), 3), "layer": r.get("layer", "sml"), "categories": r.get("categories", []), @@ -668,20 +901,199 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]: elif name == "remember": content = arguments.get("content", "") categories = arguments.get("categories") - result = memory.add( - messages=content, + token = _session_token( + user_id="default", + agent_id="claude-code", + capabilities=["propose_write"], + namespaces=[arguments.get("namespace", "default")], + ) + result = memory.propose_write( + content=content, user_id="default", + agent_id="claude-code", categories=categories, + scope=arguments.get("scope", "work"), + namespace=arguments.get("namespace", "default"), + mode=arguments.get("mode", "staging"), source_app="claude-code", + source_type="mcp", infer=False, + token=token, + ) + + elif name == "list_pending_commits": + user_id = arguments.get("user_id", "default") + agent_id = arguments.get("agent_id", "claude-code") + token = _session_token( + user_id=user_id, + agent_id=agent_id, + capabilities=["review_commits"], + ) + result = memory.list_pending_commits( + user_id=user_id, + agent_id=agent_id, + token=token, + status=arguments.get("status"), + limit=arguments.get("limit", 100), + ) + + elif name == "resolve_conflict": + agent_id = arguments.get("agent_id", "claude-code") + # Conflict ownership is resolved from stash; session user can stay default. + token = _session_token( + user_id=arguments.get("user_id", "default"), + agent_id=agent_id, + capabilities=["resolve_conflicts"], + ) + result = memory.resolve_conflict( + stash_id=arguments.get("stash_id", ""), + resolution=arguments.get("resolution", "UNRESOLVED"), + token=token, + agent_id=agent_id, + ) + + elif name == "declare_namespace": + user_id = arguments.get("user_id", "default") + caller_agent_id = arguments.get("agent_id", "claude-code") + token = _session_token( + user_id=user_id, + agent_id=caller_agent_id, + capabilities=["manage_namespaces"], + namespaces=[arguments.get("namespace", "default")], + ) + result = memory.declare_namespace( + user_id=user_id, + namespace=arguments.get("namespace", "default"), + description=arguments.get("description"), + token=token, + agent_id=caller_agent_id, + ) + + elif name == "grant_namespace_permission": + user_id = arguments.get("user_id", "default") + requester_agent_id = arguments.get("requester_agent_id", arguments.get("agent_id", "claude-code")) + token = _session_token( + user_id=user_id, + agent_id=requester_agent_id, + capabilities=["manage_namespaces"], + namespaces=[arguments.get("namespace", "default")], + ) + result = memory.grant_namespace_permission( + user_id=user_id, + namespace=arguments.get("namespace", "default"), + agent_id=arguments.get("agent_id", "claude-code"), + capability=arguments.get("capability", "read"), + expires_at=arguments.get("expires_at"), + token=token, + requester_agent_id=requester_agent_id, + ) + + elif name == "upsert_agent_policy": + user_id = arguments.get("user_id", "default") + requester_agent_id = arguments.get("requester_agent_id", arguments.get("agent_id", "claude-code")) + token = _session_token( + user_id=user_id, + agent_id=requester_agent_id, + capabilities=["manage_namespaces"], + ) + result = memory.upsert_agent_policy( + user_id=user_id, + agent_id=arguments.get("agent_id", "claude-code"), + allowed_confidentiality_scopes=arguments.get("allowed_confidentiality_scopes"), + allowed_capabilities=arguments.get("allowed_capabilities"), + allowed_namespaces=arguments.get("allowed_namespaces"), + token=token, + requester_agent_id=requester_agent_id, + ) + + elif name == "list_agent_policies": + user_id = arguments.get("user_id", "default") + requester_agent_id = arguments.get("requester_agent_id", arguments.get("agent_id", "claude-code")) + token = _session_token( + user_id=user_id, + agent_id=requester_agent_id, + capabilities=["manage_namespaces"], + ) + lookup_agent_id = arguments.get("agent_id") + if lookup_agent_id: + result = memory.get_agent_policy( + user_id=user_id, + agent_id=lookup_agent_id, + include_wildcard=arguments.get("include_wildcard", True), + token=token, + requester_agent_id=requester_agent_id, + ) + else: + result = memory.list_agent_policies( + user_id=user_id, + token=token, + requester_agent_id=requester_agent_id, + ) + + elif name == "delete_agent_policy": + user_id = arguments.get("user_id", "default") + requester_agent_id = arguments.get("requester_agent_id", arguments.get("agent_id", "claude-code")) + token = _session_token( + user_id=user_id, + agent_id=requester_agent_id, + capabilities=["manage_namespaces"], + ) + result = memory.delete_agent_policy( + user_id=user_id, + agent_id=arguments.get("agent_id", "claude-code"), + token=token, + requester_agent_id=requester_agent_id, + ) + + elif name == "get_agent_trust": + user_id = arguments.get("user_id", "default") + requester_agent_id = arguments.get("requester_agent_id", arguments.get("agent_id", "claude-code")) + token = _session_token( + user_id=user_id, + agent_id=requester_agent_id, + capabilities=["read_trust"], + ) + result = memory.get_agent_trust( + user_id=user_id, + agent_id=arguments.get("agent_id", "claude-code"), + token=token, + requester_agent_id=requester_agent_id, + ) + + elif name == "run_sleep_cycle": + user_id = arguments.get("user_id", "default") + caller_agent_id = arguments.get("agent_id", "claude-code") + token = _session_token( + user_id=user_id, + agent_id=caller_agent_id, + capabilities=["run_sleep_cycle"], + ) + result = memory.run_sleep_cycle( + user_id=arguments.get("user_id"), + date_str=arguments.get("date"), + apply_decay=arguments.get("apply_decay", True), + cleanup_stale_refs=arguments.get("cleanup_stale_refs", True), + token=token, + agent_id=caller_agent_id, ) # ---- Scene tools ---- elif name == "get_scene": scene_id = arguments.get("scene_id", "") - scene = memory.get_scene(scene_id) + user_id = arguments.get("user_id", "default") + agent_id = arguments.get("agent_id", "claude-code") + token = _session_token( + user_id=user_id, + agent_id=agent_id, + capabilities=["read_scene"], + ) + scene = memory.kernel.get_scene( + scene_id=scene_id, + user_id=user_id, + agent_id=agent_id, + token=token, + ) if scene: - scene.pop("embedding", None) result = scene else: result = {"error": "Scene not found"} @@ -714,18 +1126,32 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]: elif name == "search_scenes": query = arguments.get("query", "") user_id = arguments.get("user_id", "default") + agent_id = arguments.get("agent_id", "claude-code") limit = arguments.get("limit", 10) - scenes = memory.search_scenes(query=query, user_id=user_id, limit=limit) + token = _session_token( + user_id=user_id, + agent_id=agent_id, + capabilities=["read_scene"], + ) + payload = memory.kernel.search_scenes( + query=query, + user_id=user_id, + agent_id=agent_id, + token=token, + limit=limit, + ) + scenes = payload.get("scenes", []) result = { "scenes": [ { - "id": s["id"], + "id": s.get("id"), "title": s.get("title"), - "summary": s.get("summary"), + "summary": s.get("summary", s.get("details")), "topic": s.get("topic"), - "start_time": s.get("start_time"), + "start_time": s.get("start_time", s.get("time")), "search_score": s.get("search_score"), "memory_count": len(s.get("memory_ids", [])), + "masked": bool(s.get("masked", False)), } for s in scenes ], diff --git a/engram/memory/client.py b/engram/memory/client.py index eec86e3..b93dd87 100644 --- a/engram/memory/client.py +++ b/engram/memory/client.py @@ -1,5 +1,8 @@ +import os from typing import Any, Dict, List, Optional +from engram.core.policy import DEFAULT_CAPABILITIES + class MemoryClient: """Thin HTTP client for remote engram server.""" @@ -10,6 +13,7 @@ def __init__( host: str = "https://api.engram.ai", org_id: str = None, project_id: str = None, + admin_key: str = None, ): try: import requests # noqa: F401 @@ -20,10 +24,14 @@ def __init__( self.host = host.rstrip("/") self.org_id = org_id self.project_id = project_id + self.admin_key = admin_key + self.session_token: Optional[str] = None def _headers(self) -> Dict[str, str]: headers = {"Content-Type": "application/json"} - if self.api_key: + if self.session_token: + headers["Authorization"] = f"Bearer {self.session_token}" + elif self.api_key: headers["Authorization"] = f"Bearer {self.api_key}" if self.org_id: headers["X-Org-Id"] = self.org_id @@ -31,11 +39,22 @@ def _headers(self) -> Dict[str, str]: headers["X-Project-Id"] = self.project_id return headers - def _request(self, method: str, path: str, *, params: Dict[str, Any] = None, json_body: Dict[str, Any] = None): + def _request( + self, + method: str, + path: str, + *, + params: Dict[str, Any] = None, + json_body: Dict[str, Any] = None, + extra_headers: Dict[str, Any] = None, + ): import requests url = f"{self.host}{path}" - response = requests.request(method, url, headers=self._headers(), params=params, json=json_body, timeout=60) + headers = self._headers() + if extra_headers: + headers.update({str(k): str(v) for k, v in extra_headers.items() if v is not None}) + response = requests.request(method, url, headers=headers, params=params, json=json_body, timeout=60) response.raise_for_status() return response.json() @@ -44,11 +63,49 @@ def add(self, messages, **kwargs) -> Dict[str, Any]: payload.update(kwargs) return self._request("POST", "/v1/memories/", json_body=payload) + def create_session( + self, + *, + user_id: str, + agent_id: Optional[str] = None, + allowed_confidentiality_scopes: Optional[List[str]] = None, + capabilities: Optional[List[str]] = None, + namespaces: Optional[List[str]] = None, + ttl_minutes: int = 24 * 60, + ) -> Dict[str, Any]: + payload = { + "user_id": user_id, + "agent_id": agent_id, + "allowed_confidentiality_scopes": allowed_confidentiality_scopes or ["work"], + "capabilities": capabilities or list(DEFAULT_CAPABILITIES), + "namespaces": namespaces or ["default"], + "ttl_minutes": ttl_minutes, + } + key = self.admin_key if self.admin_key is not None else os.environ.get("ENGRAM_ADMIN_KEY") + headers = {"X-Engram-Admin-Key": key} if key else None + session = self._request("POST", "/v1/sessions", json_body=payload, extra_headers=headers) + token = session.get("token") + if token: + self.session_token = token + return session + + def propose_write(self, content: str, **kwargs) -> Dict[str, Any]: + payload = {"content": content} + payload.update(kwargs) + payload.setdefault("mode", "staging") + payload.setdefault("namespace", "default") + return self._request("POST", "/v1/memories/", json_body=payload) + def search(self, query: str, **kwargs) -> Dict[str, Any]: payload = {"query": query} payload.update(kwargs) return self._request("POST", "/v1/memories/search/", json_body=payload) + def search_scenes(self, query: str, **kwargs) -> Dict[str, Any]: + payload = {"query": query} + payload.update(kwargs) + return self._request("POST", "/v1/scenes/search", json_body=payload) + def get(self, memory_id: str, **kwargs) -> Dict[str, Any]: return self._request("GET", f"/v1/memories/{memory_id}/", params=kwargs) @@ -72,3 +129,141 @@ def delete_all(self, **kwargs) -> Dict[str, Any]: def history(self, memory_id: str, **kwargs) -> List[Dict[str, Any]]: return self._request("GET", f"/v1/memories/{memory_id}/history/", params=kwargs) + + def list_pending_commits(self, **kwargs) -> Dict[str, Any]: + return self._request("GET", "/v1/staging/commits", params=kwargs) + + def approve_commit(self, commit_id: str) -> Dict[str, Any]: + return self._request("POST", f"/v1/staging/commits/{commit_id}/approve", json_body={}) + + def reject_commit(self, commit_id: str, reason: Optional[str] = None) -> Dict[str, Any]: + return self._request("POST", f"/v1/staging/commits/{commit_id}/reject", json_body={"reason": reason}) + + def resolve_conflict(self, stash_id: str, resolution: str) -> Dict[str, Any]: + return self._request( + "POST", + f"/v1/conflicts/{stash_id}/resolve", + json_body={"resolution": resolution}, + ) + + def daily_digest(self, *, user_id: str, date: Optional[str] = None) -> Dict[str, Any]: + params: Dict[str, Any] = {"user_id": user_id} + if date: + params["date"] = date + return self._request("GET", "/v1/digest/daily", params=params) + + def run_sleep_cycle( + self, + *, + user_id: Optional[str] = None, + date: Optional[str] = None, + apply_decay: bool = True, + cleanup_stale_refs: bool = True, + ) -> Dict[str, Any]: + payload: Dict[str, Any] = { + "user_id": user_id, + "date": date, + "apply_decay": apply_decay, + "cleanup_stale_refs": cleanup_stale_refs, + } + return self._request("POST", "/v1/sleep/run", json_body=payload) + + def get_agent_trust(self, *, user_id: str, agent_id: str) -> Dict[str, Any]: + return self._request("GET", "/v1/trust", params={"user_id": user_id, "agent_id": agent_id}) + + def list_namespaces(self, *, user_id: Optional[str] = None) -> Dict[str, Any]: + params: Dict[str, Any] = {} + if user_id: + params["user_id"] = user_id + return self._request("GET", "/v1/namespaces", params=params) + + def declare_namespace( + self, + *, + user_id: str, + namespace: str, + description: Optional[str] = None, + ) -> Dict[str, Any]: + return self._request( + "POST", + "/v1/namespaces", + json_body={"user_id": user_id, "namespace": namespace, "description": description}, + ) + + def grant_namespace_permission( + self, + *, + user_id: str, + namespace: str, + agent_id: str, + capability: str = "read", + expires_at: Optional[str] = None, + ) -> Dict[str, Any]: + return self._request( + "POST", + "/v1/namespaces/permissions", + json_body={ + "user_id": user_id, + "namespace": namespace, + "agent_id": agent_id, + "capability": capability, + "expires_at": expires_at, + }, + ) + + def upsert_agent_policy( + self, + *, + user_id: str, + agent_id: str, + allowed_confidentiality_scopes: Optional[List[str]] = None, + allowed_capabilities: Optional[List[str]] = None, + allowed_namespaces: Optional[List[str]] = None, + ) -> Dict[str, Any]: + payload: Dict[str, Any] = { + "user_id": user_id, + "agent_id": agent_id, + } + if allowed_confidentiality_scopes is not None: + payload["allowed_confidentiality_scopes"] = allowed_confidentiality_scopes + if allowed_capabilities is not None: + payload["allowed_capabilities"] = allowed_capabilities + if allowed_namespaces is not None: + payload["allowed_namespaces"] = allowed_namespaces + return self._request("POST", "/v1/agent-policies", json_body=payload) + + def list_agent_policies( + self, + *, + user_id: str, + ) -> Dict[str, Any]: + return self._request("GET", "/v1/agent-policies", params={"user_id": user_id}) + + def get_agent_policy( + self, + *, + user_id: str, + agent_id: str, + include_wildcard: bool = True, + ) -> Dict[str, Any]: + return self._request( + "GET", + "/v1/agent-policies", + params={ + "user_id": user_id, + "agent_id": agent_id, + "include_wildcard": str(bool(include_wildcard)).lower(), + }, + ) + + def delete_agent_policy( + self, + *, + user_id: str, + agent_id: str, + ) -> Dict[str, Any]: + return self._request( + "DELETE", + "/v1/agent-policies", + params={"user_id": user_id, "agent_id": agent_id}, + ) diff --git a/engram/memory/episodic_store.py b/engram/memory/episodic_store.py new file mode 100644 index 0000000..e9c88e7 --- /dev/null +++ b/engram/memory/episodic_store.py @@ -0,0 +1,290 @@ +"""CAST-inspired episodic storage and retrieval.""" + +from __future__ import annotations + +import re +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional, Tuple + + +_NAME_RE = re.compile(r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b") + + +def _cosine_similarity(a: Optional[List[float]], b: Optional[List[float]]) -> float: + if not a or not b or len(a) != len(b): + return 0.0 + dot = sum(x * y for x, y in zip(a, b)) + norm_a = sum(x * x for x in a) ** 0.5 + norm_b = sum(x * x for x in b) ** 0.5 + if norm_a == 0 or norm_b == 0: + return 0.0 + return dot / (norm_a * norm_b) + + +class EpisodicStore: + def __init__(self, db, embedder, *, time_window_minutes: int = 30, topic_threshold: float = 0.7): + self.db = db + self.embedder = embedder + self.time_window_minutes = time_window_minutes + self.topic_threshold = topic_threshold + + def ingest_memory_as_view( + self, + *, + user_id: str, + agent_id: Optional[str], + memory_id: str, + content: str, + metadata: Optional[Dict[str, Any]] = None, + timestamp: Optional[str] = None, + ) -> Dict[str, Any]: + metadata = metadata or {} + timestamp = timestamp or datetime.utcnow().isoformat() + namespace = str(metadata.get("namespace", "default") or "default").strip() or "default" + place_type, place_value = self._extract_place(metadata) + topic_label = self._extract_topic(content) + topic_embedding = self.embedder.embed(topic_label, memory_action="search") if topic_label else None + characters = self._extract_characters(content=content, metadata=metadata, agent_id=agent_id) + + target_scene = self._find_scene_for_view( + user_id=user_id, + view_time=timestamp, + place_value=place_value, + topic_embedding=topic_embedding, + namespace=namespace, + ) + + if target_scene: + scene_id = target_scene["id"] + self._attach_to_scene( + scene=target_scene, + memory_id=memory_id, + view_time=timestamp, + place_value=place_value, + topic_label=topic_label, + topic_embedding=topic_embedding, + characters=characters, + namespace=namespace, + ) + else: + scene_id = self.db.add_scene( + { + "user_id": user_id, + "title": topic_label, + "summary": topic_label, + "topic": topic_label, + "location": place_value, + "participants": [c["entity_id"] for c in characters], + "memory_ids": [memory_id], + "start_time": timestamp, + "end_time": timestamp, + "embedding": topic_embedding, + "layer": "sml", + "scene_strength": 1.0, + "namespace": namespace, + } + ) + self.db.add_scene_memory(scene_id, memory_id, position=0) + self.db.update_memory(memory_id, {"scene_id": scene_id}) + + view_id = self.db.add_view( + { + "user_id": user_id, + "agent_id": agent_id, + "timestamp": timestamp, + "place_type": place_type, + "place_value": place_value, + "topic_label": topic_label, + "topic_embedding_ref": memory_id, + "characters": characters, + "raw_text": content, + "signals": { + "importance": metadata.get("importance", 0.5), + "sentiment": metadata.get("sentiment", "neutral"), + }, + "scene_id": scene_id, + } + ) + return { + "view_id": view_id, + "scene_id": scene_id, + } + + def search_scenes( + self, + *, + user_id: str, + query: str, + limit: int = 10, + entities: Optional[List[str]] = None, + place_hint: Optional[str] = None, + ) -> List[Dict[str, Any]]: + scenes = self.db.get_scenes(user_id=user_id, limit=max(limit * 5, 20)) + if not scenes: + return [] + + query_embedding = self.embedder.embed(query, memory_action="search") + query_terms = set(query.lower().split()) + entities_lower = {e.lower() for e in (entities or [])} + + scored: List[Tuple[float, Dict[str, Any]]] = [] + for scene in scenes: + score = 0.0 + scene_emb = scene.get("embedding") + score += _cosine_similarity(query_embedding, scene_emb) + + text = f"{scene.get('summary', '')} {scene.get('topic', '')} {scene.get('title', '')}".lower() + keyword_hits = sum(1 for t in query_terms if t in text) + score += keyword_hits * 0.05 + + if place_hint and scene.get("location"): + if place_hint.lower() in str(scene.get("location", "")).lower(): + score += 0.1 + else: + continue + + participants = {str(p).lower() for p in scene.get("participants", [])} + if entities_lower and not (participants & entities_lower): + continue + + scene["search_score"] = round(score, 4) + scored.append((score, scene)) + + scored.sort(key=lambda x: x[0], reverse=True) + return [scene for _, scene in scored[:limit]] + + def _extract_place(self, metadata: Dict[str, Any]) -> Tuple[str, Optional[str]]: + place = metadata.get("place") or metadata.get("location") + if isinstance(place, dict): + return str(place.get("type") or "digital"), place.get("value") + if isinstance(place, str): + return "digital", place + repo = metadata.get("repo") or metadata.get("workspace") + if repo: + return "digital", str(repo) + return "digital", None + + def _extract_topic(self, content: str) -> str: + terms = (content or "").strip().split() + return " ".join(terms[:10]) if terms else "untitled" + + def _extract_characters(self, *, content: str, metadata: Dict[str, Any], agent_id: Optional[str]) -> List[Dict[str, str]]: + chars: List[Dict[str, str]] = [] + primary = metadata.get("actor_id") or metadata.get("speaker") or agent_id or "char_self" + chars.append({"entity_id": str(primary), "role": "MC"}) + + for match in _NAME_RE.findall(content or ""): + name = match.strip() + if name.lower() in {"i", "we", "the", "this", "that"}: + continue + if name == primary: + continue + chars.append({"entity_id": name, "role": "SC"}) + + seen = set() + unique = [] + for c in chars: + key = (c["entity_id"], c["role"]) + if key in seen: + continue + seen.add(key) + unique.append(c) + return unique + + def _find_scene_for_view( + self, + *, + user_id: str, + view_time: str, + place_value: Optional[str], + topic_embedding: Optional[List[float]], + namespace: str, + ) -> Optional[Dict[str, Any]]: + candidates = self.db.get_scenes(user_id=user_id, limit=25) + if not candidates: + return None + + view_dt = self._safe_parse_time(view_time) + best_score = -1.0 + best_scene = None + + for scene in candidates: + scene_namespace = str(scene.get("namespace", "default") or "default").strip() or "default" + if scene_namespace != namespace: + continue + cond_count = 0 + score = 0.0 + + scene_time = scene.get("end_time") or scene.get("start_time") + scene_dt = self._safe_parse_time(scene_time) + if view_dt and scene_dt and abs((view_dt - scene_dt).total_seconds()) <= self.time_window_minutes * 60: + cond_count += 1 + score += 0.4 + + scene_place = scene.get("location") + if place_value and scene_place and str(place_value).lower() == str(scene_place).lower(): + cond_count += 1 + score += 0.3 + + sim = _cosine_similarity(topic_embedding, scene.get("embedding")) + if sim >= self.topic_threshold: + cond_count += 1 + score += min(0.3, sim * 0.3) + + if cond_count >= 2 and score > best_score: + best_score = score + best_scene = scene + + return best_scene + + def _attach_to_scene( + self, + *, + scene: Dict[str, Any], + memory_id: str, + view_time: str, + place_value: Optional[str], + topic_label: str, + topic_embedding: Optional[List[float]], + characters: List[Dict[str, str]], + namespace: str, + ) -> None: + scene_id = scene["id"] + memory_ids = list(scene.get("memory_ids", [])) + if memory_id not in memory_ids: + position = len(memory_ids) + memory_ids.append(memory_id) + self.db.add_scene_memory(scene_id, memory_id, position=position) + participants = set(scene.get("participants", [])) + participants.update(c["entity_id"] for c in characters) + + updates: Dict[str, Any] = { + "memory_ids": memory_ids, + "participants": sorted(participants), + "end_time": view_time, + "location": place_value or scene.get("location"), + "summary": scene.get("summary") or topic_label, + "topic": scene.get("topic") or topic_label, + "namespace": namespace, + } + if topic_embedding and scene.get("embedding"): + old = scene.get("embedding") + n = max(len(memory_ids) - 1, 1) + updates["embedding"] = [ + (old[i] * n + topic_embedding[i]) / (n + 1) + for i in range(len(topic_embedding)) + ] + elif topic_embedding: + updates["embedding"] = topic_embedding + + self.db.update_scene(scene_id, updates) + self.db.update_memory(memory_id, {"scene_id": scene_id}) + + @staticmethod + def _safe_parse_time(value: Optional[str]) -> Optional[datetime]: + if not value: + return None + try: + return datetime.fromisoformat(value) + except Exception: + return None diff --git a/engram/memory/main.py b/engram/memory/main.py index a571045..ddf7f47 100644 --- a/engram/memory/main.py +++ b/engram/memory/main.py @@ -2,6 +2,7 @@ import json import logging +import os import uuid from datetime import datetime, date from enum import Enum @@ -23,6 +24,8 @@ from engram.core.graph import KnowledgeGraph from engram.core.scene import SceneProcessor from engram.core.profile import ProfileProcessor +from engram.core.kernel import PersonalMemoryKernel +from engram.core.policy import feature_enabled from engram.db.sqlite import SQLiteManager from engram.exceptions import FadeMemValidationError from engram.memory.base import MemoryBase @@ -34,6 +37,7 @@ parse_messages, strip_code_fences, ) +from engram.observability import metrics from engram.utils.factory import EmbedderFactory, LLMFactory, VectorStoreFactory from engram.utils.prompts import AGENT_MEMORY_EXTRACTION_PROMPT, MEMORY_EXTRACTION_PROMPT @@ -167,6 +171,9 @@ def __init__(self, config: Optional[MemoryConfig] = None): else: self.profile_processor = None + # v2 Personal Memory Kernel orchestration layer. + self.kernel = PersonalMemoryKernel(self) + @classmethod def from_config(cls, config_dict: Dict[str, Any]): return cls(MemoryConfig(**config_dict)) @@ -265,7 +272,8 @@ def add( content = explicit_intent.content blocked = detect_sensitive_categories(content) - if blocked: + allow_sensitive = bool(mem_metadata.get("allow_sensitive")) + if blocked and not allow_sensitive: results.append( { "event": "BLOCKED", @@ -436,6 +444,20 @@ def add( if low_confidence: layer = "sml" + confidentiality_scope = str( + mem_metadata.get("confidentiality_scope") + or mem_metadata.get("privacy_scope") + or "work" + ).lower() + source_type = ( + mem_metadata.get("source_type") + or ("cli" if (source_app or "").lower() == "cli" else "mcp") + ) + source_event_id = mem_metadata.get("source_event_id") + importance = mem_metadata.get("importance", 0.5) + sensitivity = mem_metadata.get("sensitivity", "normal") + namespace_value = str(mem_metadata.get("namespace", "default") or "default").strip() or "default" + memory_id = str(uuid.uuid4()) now = datetime.utcnow().isoformat() memory_data = { @@ -456,6 +478,15 @@ def add( "access_count": 0, "last_accessed": now, "embedding": embedding, + "confidentiality_scope": confidentiality_scope, + "source_type": source_type, + "source_app": source_app or mem_metadata.get("source_app"), + "source_event_id": source_event_id, + "decay_lambda": self.fadem_config.sml_decay_rate, + "status": "active", + "importance": importance, + "sensitivity": sensitivity, + "namespace": namespace_value, } vectors, payloads, vector_ids = self._build_index_vectors( @@ -515,6 +546,7 @@ def add( "strength": effective_strength, "echo_depth": echo_result.echo_depth.value if echo_result else None, "categories": mem_categories, + "namespace": namespace_value, "vector_nodes": len(vectors) # Info for user } ) @@ -723,6 +755,8 @@ def search( and metadata.get("echo_depth") != "deep" ): self._reecho_memory(memory["id"]) + if agent_id: + self.db.add_memory_subscriber(memory["id"], f"agent:{agent_id}", ref_type="weak") results.append( { @@ -747,6 +781,14 @@ def search( "last_accessed": memory.get("last_accessed"), "composite_score": combined, "scope": scope, + "namespace": memory.get("namespace", "default"), + "confidentiality_scope": memory.get("confidentiality_scope", "work"), + "source_type": memory.get("source_type"), + "source_app": memory.get("source_app"), + "source_event_id": memory.get("source_event_id"), + "status": memory.get("status", "active"), + "importance": memory.get("importance", 0.5), + "sensitivity": memory.get("sensitivity", "normal"), "echo_boost": echo_boost, "category_boost": category_boost, "graph_boost": graph_boost, @@ -986,6 +1028,13 @@ def apply_decay(self, scope: Dict[str, Any] = None) -> Dict[str, Any]: if not self.fadem_config.enable_forgetting: return {"decayed": 0, "forgotten": 0, "promoted": 0} + stale_refs_removed = 0 + if feature_enabled("ENGRAM_V2_REF_GC", default=True): + try: + stale_refs_removed = int(self.kernel.ref_manager.cleanup_stale_refs()) + except Exception: + stale_refs_removed = 0 + memories = self.db.get_all_memories( user_id=scope.get("user_id") if scope else None, agent_id=scope.get("agent_id") if scope else None, @@ -1001,6 +1050,15 @@ def apply_decay(self, scope: Dict[str, Any] = None) -> Dict[str, Any]: if memory.get("immutable"): continue + ref_aware = feature_enabled("ENGRAM_V2_REF_AWARE_DECAY", default=True) + ref_state = {"strong": 0, "weak": 0} + if ref_aware: + ref_state = self.db.get_memory_refcount(memory["id"]) + if int(ref_state.get("strong", 0)) > 0: + # Strong references pause decay/deletion. + metrics.record_ref_protected_skip(1) + continue + new_strength = calculate_decayed_strength( current_strength=memory.get("strength", 1.0), last_accessed=memory.get("last_accessed", datetime.utcnow().isoformat()), @@ -1009,7 +1067,18 @@ def apply_decay(self, scope: Dict[str, Any] = None) -> Dict[str, Any]: config=self.fadem_config, ) - if should_forget(new_strength, self.fadem_config): + if ref_aware and int(ref_state.get("weak", 0)) > 0: + weak = min(int(ref_state.get("weak", 0)), 10) + dampening = 1.0 + weak * 0.15 + retained_floor = memory.get("strength", 1.0) * (1.0 - 0.03 / dampening) + new_strength = max(new_strength, retained_floor) + + forget_threshold = self.fadem_config.forgetting_threshold + if ref_aware and int(ref_state.get("weak", 0)) > 0: + weak = min(int(ref_state.get("weak", 0)), 10) + forget_threshold = forget_threshold / (1.0 + weak * 0.25) + + if new_strength < forget_threshold: self.delete(memory["id"]) forgotten += 1 continue @@ -1033,7 +1102,12 @@ def apply_decay(self, scope: Dict[str, Any] = None) -> Dict[str, Any]: self.db.purge_tombstoned() self.db.log_decay(decayed, forgotten, promoted) - return {"decayed": decayed, "forgotten": forgotten, "promoted": promoted} + return { + "decayed": decayed, + "forgotten": forgotten, + "promoted": promoted, + "stale_refs_removed": stale_refs_removed, + } def fuse_memories(self, memory_ids: List[str], user_id: str = None) -> Dict[str, Any]: memories = [self.db.get_memory(mid) for mid in memory_ids] @@ -1091,6 +1165,280 @@ def promote(self, memory_id: str) -> Dict[str, Any]: def demote(self, memory_id: str) -> Dict[str, Any]: return {"success": self.db.update_memory(memory_id, {"layer": "sml"})} + # v2 kernel facade methods + def create_session( + self, + *, + user_id: str, + agent_id: Optional[str] = None, + allowed_confidentiality_scopes: Optional[List[str]] = None, + capabilities: Optional[List[str]] = None, + namespaces: Optional[List[str]] = None, + ttl_minutes: int = 24 * 60, + ) -> Dict[str, Any]: + return self.kernel.create_session( + user_id=user_id, + agent_id=agent_id, + allowed_confidentiality_scopes=allowed_confidentiality_scopes, + capabilities=capabilities, + namespaces=namespaces, + ttl_minutes=ttl_minutes, + ) + + def search_with_context( + self, + *, + query: str, + user_id: str, + agent_id: Optional[str] = None, + token: Optional[str] = None, + limit: int = 10, + categories: Optional[List[str]] = None, + ) -> Dict[str, Any]: + return self.kernel.search( + query=query, + user_id=user_id, + agent_id=agent_id, + token=token, + limit=limit, + categories=categories, + ) + + def propose_write( + self, + *, + content: str, + user_id: str, + agent_id: Optional[str] = None, + token: Optional[str] = None, + categories: Optional[List[str]] = None, + metadata: Optional[Dict[str, Any]] = None, + scope: str = "work", + namespace: Optional[str] = None, + mode: str = "staging", + infer: bool = False, + source_app: Optional[str] = None, + source_type: str = "mcp", + source_event_id: Optional[str] = None, + trusted_direct: bool = False, + ) -> Dict[str, Any]: + return self.kernel.propose_write( + content=content, + user_id=user_id, + agent_id=agent_id, + token=token, + categories=categories, + metadata=metadata, + scope=scope, + namespace=namespace, + mode=mode, + infer=infer, + source_app=source_app, + source_type=source_type, + source_event_id=source_event_id, + trusted_direct=trusted_direct, + ) + + def list_pending_commits( + self, + user_id: Optional[str] = None, + status: Optional[str] = None, + limit: int = 100, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + return self.kernel.list_pending_commits( + user_id=user_id, + status=status, + limit=limit, + token=token, + agent_id=agent_id, + ) + + def approve_commit( + self, + commit_id: str, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + return self.kernel.approve_commit(commit_id=commit_id, token=token, agent_id=agent_id) + + def reject_commit( + self, + commit_id: str, + reason: Optional[str] = None, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + return self.kernel.reject_commit(commit_id=commit_id, reason=reason, token=token, agent_id=agent_id) + + def resolve_conflict( + self, + stash_id: str, + resolution: str, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + return self.kernel.resolve_conflict(stash_id=stash_id, resolution=resolution, token=token, agent_id=agent_id) + + def get_daily_digest( + self, + user_id: str, + date_str: str, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + return self.kernel.get_daily_digest( + user_id=user_id, + date_str=date_str, + token=token, + agent_id=agent_id, + ) + + def run_sleep_cycle( + self, + user_id: Optional[str] = None, + date_str: Optional[str] = None, + apply_decay: bool = True, + cleanup_stale_refs: bool = True, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + return self.kernel.run_sleep_cycle( + user_id=user_id, + date_str=date_str, + apply_decay=apply_decay, + cleanup_stale_refs=cleanup_stale_refs, + token=token, + agent_id=agent_id, + ) + + def get_agent_trust( + self, + user_id: str, + agent_id: str, + token: Optional[str] = None, + requester_agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + return self.kernel.get_agent_trust( + user_id=user_id, + agent_id=agent_id, + token=token, + requester_agent_id=requester_agent_id, + ) + + def list_namespaces( + self, + user_id: Optional[str] = None, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> List[Dict[str, Any]]: + return self.kernel.list_namespaces(user_id=user_id, token=token, agent_id=agent_id) + + def declare_namespace( + self, + *, + user_id: str, + namespace: str, + description: Optional[str] = None, + token: Optional[str] = None, + agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + return self.kernel.declare_namespace( + user_id=user_id, + namespace=namespace, + description=description, + token=token, + agent_id=agent_id, + ) + + def grant_namespace_permission( + self, + *, + user_id: str, + namespace: str, + agent_id: str, + capability: str = "read", + expires_at: Optional[str] = None, + token: Optional[str] = None, + requester_agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + return self.kernel.grant_namespace_permission( + user_id=user_id, + namespace=namespace, + agent_id=agent_id, + capability=capability, + expires_at=expires_at, + token=token, + requester_agent_id=requester_agent_id, + ) + + def upsert_agent_policy( + self, + *, + user_id: str, + agent_id: str, + allowed_confidentiality_scopes: Optional[List[str]] = None, + allowed_capabilities: Optional[List[str]] = None, + allowed_namespaces: Optional[List[str]] = None, + token: Optional[str] = None, + requester_agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + return self.kernel.upsert_agent_policy( + user_id=user_id, + agent_id=agent_id, + allowed_confidentiality_scopes=allowed_confidentiality_scopes, + allowed_capabilities=allowed_capabilities, + allowed_namespaces=allowed_namespaces, + token=token, + requester_agent_id=requester_agent_id, + ) + + def get_agent_policy( + self, + *, + user_id: str, + agent_id: str, + include_wildcard: bool = True, + token: Optional[str] = None, + requester_agent_id: Optional[str] = None, + ) -> Optional[Dict[str, Any]]: + return self.kernel.get_agent_policy( + user_id=user_id, + agent_id=agent_id, + include_wildcard=include_wildcard, + token=token, + requester_agent_id=requester_agent_id, + ) + + def list_agent_policies( + self, + *, + user_id: Optional[str] = None, + token: Optional[str] = None, + requester_agent_id: Optional[str] = None, + ) -> List[Dict[str, Any]]: + return self.kernel.list_agent_policies( + user_id=user_id, + token=token, + requester_agent_id=requester_agent_id, + ) + + def delete_agent_policy( + self, + *, + user_id: str, + agent_id: str, + token: Optional[str] = None, + requester_agent_id: Optional[str] = None, + ) -> Dict[str, Any]: + return self.kernel.delete_agent_policy( + user_id=user_id, + agent_id=agent_id, + token=token, + requester_agent_id=requester_agent_id, + ) + # Internal helpers def _extract_memories( self, @@ -1837,12 +2185,25 @@ def _assign_to_scene( self.scene_processor.auto_close_stale(user_id) current_scene = self.db.get_open_scene(user_id) - detection = self.scene_processor.detect_boundary( - content=content, - timestamp=timestamp, - current_scene=current_scene, - embedding=embedding, - ) + memory_row = self.db.get_memory(memory_id) or {} + namespace = str(memory_row.get("namespace", "default") or "default").strip() or "default" + if ( + current_scene + and str(current_scene.get("namespace", "default") or "default").strip() != namespace + ): + detection = self.scene_processor.detect_boundary( + content=content, + timestamp=timestamp, + current_scene=None, + embedding=embedding, + ) + else: + detection = self.scene_processor.detect_boundary( + content=content, + timestamp=timestamp, + current_scene=current_scene, + embedding=embedding, + ) if detection.is_new_scene: # Close old scene if open @@ -1860,6 +2221,7 @@ def _assign_to_scene( topic=topic, location=location, embedding=embedding, + namespace=namespace, ) else: if current_scene: @@ -1868,6 +2230,7 @@ def _assign_to_scene( memory_id=memory_id, embedding=embedding, timestamp=timestamp, + namespace=namespace, ) def _update_profiles( @@ -1904,6 +2267,7 @@ def get_scenes( topic: Optional[str] = None, start_after: Optional[str] = None, start_before: Optional[str] = None, + namespace: Optional[str] = None, limit: int = 50, ) -> List[Dict[str, Any]]: """List scenes chronologically.""" @@ -1912,6 +2276,7 @@ def get_scenes( topic=topic, start_after=start_after, start_before=start_before, + namespace=namespace, limit=limit, ) @@ -1960,3 +2325,15 @@ def update_profile(self, profile_id: str, updates: Dict[str, Any]) -> bool: def get_profile_memories(self, profile_id: str) -> List[Dict[str, Any]]: """Get memories linked to a profile.""" return self.db.get_profile_memories(profile_id) + + # ========================================================================= + # Dashboard / Visualization Methods + # ========================================================================= + + def get_constellation_data(self, user_id: Optional[str] = None, limit: int = 200) -> Dict[str, Any]: + """Get graph nodes + edges for the constellation force layout.""" + return self.db.get_constellation_data(user_id=user_id, limit=limit) + + def get_decay_log(self, limit: int = 20) -> List[Dict[str, Any]]: + """Get recent decay history for dashboard sparkline.""" + return self.db.get_decay_log_entries(limit=limit) diff --git a/engram/memory/staging_store.py b/engram/memory/staging_store.py new file mode 100644 index 0000000..45d3b64 --- /dev/null +++ b/engram/memory/staging_store.py @@ -0,0 +1,97 @@ +"""Staging storage for untrusted agent writes.""" + +from __future__ import annotations + +import uuid +from datetime import datetime +from typing import Any, Dict, List, Optional + + +class StagingStore: + def __init__(self, db): + self.db = db + + def create_commit( + self, + *, + user_id: str, + agent_id: Optional[str], + scope: str, + changes: List[Dict[str, Any]], + checks: Dict[str, Any], + preview: Dict[str, Any], + provenance: Dict[str, Any], + status: str = "PENDING", + ) -> Dict[str, Any]: + commit_id = str(uuid.uuid4()) + created_at = datetime.utcnow().isoformat() + payload = { + "id": commit_id, + "user_id": user_id, + "agent_id": agent_id, + "scope": scope, + "status": status, + "checks": checks, + "preview": preview, + "provenance": provenance, + "created_at": created_at, + "updated_at": created_at, + } + self.db.add_proposal_commit(payload, changes=changes) + return {**payload, "changes": changes} + + def list_commits(self, user_id: Optional[str], status: Optional[str], limit: int = 100) -> List[Dict[str, Any]]: + commits = self.db.list_proposal_commits(user_id=user_id, status=status, limit=limit) + for commit in commits: + commit["changes"] = self.db.get_proposal_changes(commit["id"]) + return commits + + def get_commit(self, commit_id: str) -> Optional[Dict[str, Any]]: + return self.db.get_proposal_commit(commit_id) + + def mark_approved(self, commit_id: str) -> None: + self.db.update_proposal_commit(commit_id, {"status": "APPROVED"}) + + def mark_rejected(self, commit_id: str, reason: Optional[str] = None) -> None: + updates = {"status": "REJECTED"} + if reason: + commit = self.get_commit(commit_id) or {} + checks = dict(commit.get("checks", {})) + checks["rejection_reason"] = reason + updates["checks"] = checks + self.db.update_proposal_commit(commit_id, updates) + + def mark_auto_stashed(self, commit_id: str) -> None: + self.db.update_proposal_commit(commit_id, {"status": "AUTO_STASHED"}) + + def add_conflict( + self, + *, + user_id: str, + conflict_key: str, + existing: Dict[str, Any], + proposed: Dict[str, Any], + source_commit_id: Optional[str], + ) -> str: + return self.db.add_conflict_stash( + { + "user_id": user_id, + "conflict_key": conflict_key, + "existing": existing, + "proposed": proposed, + "source_commit_id": source_commit_id, + "resolution": "UNRESOLVED", + } + ) + + def resolve_conflict(self, stash_id: str, resolution: str) -> bool: + return self.db.resolve_conflict_stash(stash_id, resolution) + + def list_conflicts( + self, + *, + user_id: Optional[str], + resolution: Optional[str] = None, + limit: int = 100, + ) -> List[Dict[str, Any]]: + return self.db.list_conflict_stash(user_id=user_id, resolution=resolution, limit=limit) diff --git a/engram/observability.py b/engram/observability.py index b358c3b..272b839 100644 --- a/engram/observability.py +++ b/engram/observability.py @@ -150,6 +150,12 @@ class MemoryMetrics: total_forgotten: int = 0 total_promoted: int = 0 search_results_total: int = 0 + total_masked_hits: int = 0 + total_staged_commits: int = 0 + total_auto_stashed: int = 0 + total_commit_approved: int = 0 + total_commit_rejected: int = 0 + total_ref_protected_skips: int = 0 def to_dict(self) -> Dict[str, Any]: return { @@ -158,6 +164,12 @@ def to_dict(self) -> Dict[str, Any]: "total_decayed": self.total_decayed, "total_forgotten": self.total_forgotten, "total_promoted": self.total_promoted, + "total_masked_hits": self.total_masked_hits, + "total_staged_commits": self.total_staged_commits, + "total_auto_stashed": self.total_auto_stashed, + "total_commit_approved": self.total_commit_approved, + "total_commit_rejected": self.total_commit_rejected, + "total_ref_protected_skips": self.total_ref_protected_skips, "avg_search_results": round( self.search_results_total / self.total_searched, 2 ) if self.total_searched > 0 else 0, @@ -221,6 +233,31 @@ def record_delete(self, latency_ms: float, **tags): """Record memory delete operation.""" self.record_operation("delete", latency_ms, **tags) + def record_masked_hits(self, count: int = 1): + with self._lock: + self._memory.total_masked_hits += max(0, int(count)) + + def record_staged_commit(self, status: str): + status_upper = (status or "").upper() + with self._lock: + self._memory.total_staged_commits += 1 + if status_upper == "AUTO_STASHED": + self._memory.total_auto_stashed += 1 + + def record_commit_approval(self, latency_ms: float): + self.record_operation("commit_approve", latency_ms) + with self._lock: + self._memory.total_commit_approved += 1 + + def record_commit_rejection(self): + self.record_operation("commit_reject", 0) + with self._lock: + self._memory.total_commit_rejected += 1 + + def record_ref_protected_skip(self, count: int = 1): + with self._lock: + self._memory.total_ref_protected_skips += max(0, int(count)) + def set_gauge(self, name: str, value: float): """Set a custom gauge metric.""" with self._lock: @@ -258,6 +295,12 @@ def get_prometheus_metrics(self) -> str: lines.append(f'engram_memories_decayed_total {mem["total_decayed"]}') lines.append(f'engram_memories_forgotten_total {mem["total_forgotten"]}') lines.append(f'engram_memories_promoted_total {mem["total_promoted"]}') + lines.append(f'engram_memories_masked_hits_total {mem["total_masked_hits"]}') + lines.append(f'engram_staged_commits_total {mem["total_staged_commits"]}') + lines.append(f'engram_staged_auto_stashed_total {mem["total_auto_stashed"]}') + lines.append(f'engram_commit_approved_total {mem["total_commit_approved"]}') + lines.append(f'engram_commit_rejected_total {mem["total_commit_rejected"]}') + lines.append(f'engram_ref_protected_skips_total {mem["total_ref_protected_skips"]}') # Custom gauges for name, value in summary["gauges"].items(): diff --git a/engram/retrieval/__init__.py b/engram/retrieval/__init__.py new file mode 100644 index 0000000..1942d25 --- /dev/null +++ b/engram/retrieval/__init__.py @@ -0,0 +1,5 @@ +"""Engram v2 retrieval components.""" + +from engram.retrieval.dual_search import DualSearchEngine + +__all__ = ["DualSearchEngine"] diff --git a/engram/retrieval/context_packer.py b/engram/retrieval/context_packer.py new file mode 100644 index 0000000..4a6b665 --- /dev/null +++ b/engram/retrieval/context_packer.py @@ -0,0 +1,62 @@ +"""Context packet builder for token-bounded retrieval output.""" + +from __future__ import annotations + +from typing import Dict, List, Set + + +def _estimate_tokens(text: str) -> int: + if not text: + return 0 + # Conservative heuristic for GPT-style tokenization. + return max(1, len(text) // 4) + + +def pack_context( + *, + query: str, + results: List[Dict], + episodic_scenes: List[Dict], + max_tokens: int = 800, + max_items: int = 8, +) -> Dict: + scene_ids: Set[str] = {str(scene.get("id")) for scene in episodic_scenes if scene.get("id")} + snippets: List[Dict] = [] + token_used = _estimate_tokens(query) + masked_count = 0 + + for item in results[: max_items * 3]: + is_masked = bool(item.get("masked")) + if is_masked: + masked_count += 1 + text = item.get("memory") or item.get("details") or "" + candidate_tokens = _estimate_tokens(text) + if token_used + candidate_tokens > max_tokens and snippets: + break + + snippet = { + "memory_id": item.get("id"), + "text": text, + "masked": is_masked, + "score": item.get("composite_score", item.get("score")), + "citations": { + "scene_ids": list(scene_ids), + }, + } + snippets.append(snippet) + token_used += candidate_tokens + if len(snippets) >= max_items: + break + + return { + "query": query, + "snippets": snippets, + "token_usage": { + "estimated_tokens": token_used, + "budget": max_tokens, + }, + "masking": { + "masked_count": masked_count, + "total_candidates": len(results), + }, + } diff --git a/engram/retrieval/dual_search.py b/engram/retrieval/dual_search.py new file mode 100644 index 0000000..01ae38f --- /dev/null +++ b/engram/retrieval/dual_search.py @@ -0,0 +1,136 @@ +"""Dual retrieval engine: semantic + episodic with intersection promotion.""" + +from __future__ import annotations + +from typing import Any, Dict, Iterable, List, Optional, Set + +from engram.core.policy import enforce_scope_on_results +from engram.observability import metrics +from engram.retrieval.context_packer import pack_context +from engram.retrieval.reranker import intersection_promote + + +class DualSearchEngine: + def __init__(self, *, memory, episodic_store, ref_manager): + self.memory = memory + self.episodic_store = episodic_store + self.ref_manager = ref_manager + + def search( + self, + *, + query: str, + user_id: str, + agent_id: Optional[str], + limit: int = 10, + categories: Optional[List[str]] = None, + allowed_confidentiality_scopes: Optional[Iterable[str]] = None, + allowed_namespaces: Optional[Iterable[str]] = None, + ) -> Dict[str, Any]: + semantic_payload = self.memory.search( + query=query, + user_id=user_id, + agent_id=agent_id, + limit=max(limit * 2, 10), + categories=categories, + ) + semantic_results = semantic_payload.get("results", semantic_payload) + + episodic_scenes = self.episodic_store.search_scenes( + user_id=user_id, + query=query, + limit=max(limit, 5), + ) + visible_scenes = self._filter_scenes_by_namespace(episodic_scenes, allowed_namespaces) + + promoted = intersection_promote(semantic_results, visible_scenes) + for item in promoted: + if "confidentiality_scope" not in item: + row = self.memory.db.get_memory(item.get("id")) + if row: + item["confidentiality_scope"] = row.get("confidentiality_scope", "work") + item["importance"] = row.get("importance", 0.5) + + masked = enforce_scope_on_results(promoted, allowed_confidentiality_scopes) + namespaced = self._enforce_namespace_on_results(masked, allowed_namespaces) + final_results = namespaced[:limit] + masked_count = sum(1 for item in final_results if item.get("masked")) + if masked_count: + metrics.record_masked_hits(masked_count) + + context_packet = pack_context( + query=query, + results=final_results, + episodic_scenes=visible_scenes, + max_tokens=800, + max_items=min(8, limit), + ) + + if agent_id: + visible_ids = [r.get("id") for r in final_results if r.get("id") and not r.get("masked")] + self.ref_manager.record_retrieval_refs(visible_ids, agent_id=agent_id, strong=False) + + return { + "results": final_results, + "count": len(final_results), + "context_packet": context_packet, + "scene_hits": [ + { + "scene_id": s.get("id"), + "summary": s.get("summary"), + "memory_ids": s.get("memory_ids", []), + "search_score": s.get("search_score"), + } + for s in visible_scenes[:limit] + ], + } + + @staticmethod + def _namespace_mask(item: Dict[str, Any]) -> Dict[str, Any]: + return { + "id": item.get("id"), + "type": "private_event", + "time": item.get("created_at") or item.get("timestamp"), + "importance": item.get("importance", 0.5), + "details": "[REDACTED]", + "masked": True, + } + + def _enforce_namespace_on_results( + self, + results: List[Dict[str, Any]], + allowed_namespaces: Optional[Iterable[str]], + ) -> List[Dict[str, Any]]: + allowed: Set[str] = {str(ns).strip() for ns in (allowed_namespaces or []) if str(ns).strip()} + if not allowed or "*" in allowed: + normalized: List[Dict[str, Any]] = [] + for item in results: + value = dict(item) + value["masked"] = bool(value.get("masked", False)) + normalized.append(value) + return normalized + + filtered: List[Dict[str, Any]] = [] + for item in results: + namespace = str(item.get("namespace") or "default").strip() or "default" + if namespace in allowed: + value = dict(item) + value["masked"] = bool(value.get("masked", False)) + filtered.append(value) + else: + filtered.append(self._namespace_mask(item)) + return filtered + + def _filter_scenes_by_namespace( + self, + scenes: List[Dict[str, Any]], + allowed_namespaces: Optional[Iterable[str]], + ) -> List[Dict[str, Any]]: + allowed: Set[str] = {str(ns).strip() for ns in (allowed_namespaces or []) if str(ns).strip()} + if not allowed or "*" in allowed: + return scenes + return [ + scene + for scene in scenes + if str(scene.get("namespace") or "default").strip() in allowed + ] diff --git a/engram/retrieval/reranker.py b/engram/retrieval/reranker.py new file mode 100644 index 0000000..b20023d --- /dev/null +++ b/engram/retrieval/reranker.py @@ -0,0 +1,37 @@ +"""Re-ranking helpers for dual retrieval.""" + +from __future__ import annotations + +from typing import Dict, List, Set + + +def intersection_promote( + semantic_results: List[Dict], + episodic_scene_results: List[Dict], +) -> List[Dict]: + """Promote semantic results that also appear in episodic scenes. + + Relative order among promoted items follows original semantic ranking. + """ + episodic_memory_ids: Set[str] = set() + for scene in episodic_scene_results: + for mid in scene.get("memory_ids", []) or []: + episodic_memory_ids.add(str(mid)) + + if not episodic_memory_ids: + return semantic_results + + promoted: List[Dict] = [] + others: List[Dict] = [] + for item in semantic_results: + mid = str(item.get("id")) + if mid in episodic_memory_ids: + enriched = dict(item) + enriched["episodic_match"] = True + promoted.append(enriched) + else: + enriched = dict(item) + enriched["episodic_match"] = False + others.append(enriched) + + return promoted + others diff --git a/plugins/engram-memory/hooks/prompt_context.py b/plugins/engram-memory/hooks/prompt_context.py index a5d1333..3bdd6b1 100755 --- a/plugins/engram-memory/hooks/prompt_context.py +++ b/plugins/engram-memory/hooks/prompt_context.py @@ -1,59 +1,46 @@ #!/usr/bin/env python3 """Engram UserPromptSubmit hook — stdlib-only proactive memory injector. -Reads the user prompt from STDIN (or falls back to the USER_PROMPT env var), -queries the running Engram API for relevant memories, and prints a JSON -object with a ``systemMessage`` key that Claude Code will inject into context. - -Design constraints ------------------- -* stdlib only — runs as a bare subprocess, no pip install -* Phase 1: GET /health with 3 s timeout – fast-fail if API is down -* Phase 2: POST /v1/search with 6 s timeout -* Query derivation is pure string ops (no LLM call) -* Always exits 0; any failure prints ``{}`` +Reads the user prompt, ensures a capability token, queries Engram search, +and emits a systemMessage for Claude Code context injection. """ import json import os import sys +from datetime import datetime, timedelta +from pathlib import Path try: from urllib.request import Request, urlopen - from urllib.error import URLError -except ImportError: # pragma: no cover – safety net +except ImportError: # pragma: no cover sys.stdout.write("{}") sys.exit(0) -# --------------------------------------------------------------------------- -# Configuration (all env-overridable) -# --------------------------------------------------------------------------- API_BASE = os.environ.get("ENGRAM_API_URL", "http://127.0.0.1:8100") -HEALTH_TIMEOUT = 3 # seconds -SEARCH_TIMEOUT = 6 # seconds +HEALTH_TIMEOUT = 3 +SEARCH_TIMEOUT = 6 +SESSION_TIMEOUT = 4 MAX_QUERY_CHARS = 120 SENTINEL = "[Engram \u2014 relevant memories from previous sessions]" +USER_ID = os.environ.get("ENGRAM_USER_ID", "default") +AGENT_ID = os.environ.get("ENGRAM_AGENT_ID", "claude-code") +TOKEN_CACHE = Path(os.environ.get("ENGRAM_TOKEN_CACHE", str(Path.home() / ".engram" / "session_token.json"))) +ADMIN_KEY = os.environ.get("ENGRAM_ADMIN_KEY", "").strip() -def _derive_query(raw: str) -> str: - """Extract a short query from the raw user prompt (no LLM). - Takes the first sentence (split on . ! ?) or the first MAX_QUERY_CHARS - characters, whichever is shorter. - """ +def _derive_query(raw: str) -> str: raw = raw.strip() - # Find the end of the first sentence for i, ch in enumerate(raw): if ch in ".!?" and i > 0: candidate = raw[: i + 1].strip() if candidate: return candidate[:MAX_QUERY_CHARS] - # No sentence-ending punctuation found — just truncate return raw[:MAX_QUERY_CHARS] def _health_check() -> bool: - """GET /health — returns True if the API is reachable and healthy.""" try: req = Request(f"{API_BASE}/health") resp = urlopen(req, timeout=HEALTH_TIMEOUT) @@ -62,13 +49,102 @@ def _health_check() -> bool: return False -def _search(query: str) -> list: - """POST /v1/search — returns the raw results list (may be empty).""" - payload = json.dumps({"query": query, "limit": 5}).encode("utf-8") +def _parse_time(value: str): + try: + return datetime.fromisoformat(value) + except Exception: + return None + + +def _read_cached_token() -> str: + if not TOKEN_CACHE.exists(): + return "" + try: + data = json.loads(TOKEN_CACHE.read_text(encoding="utf-8")) + token = data.get("token", "") + expires_at = data.get("expires_at", "") + if not token or not expires_at: + return "" + exp = _parse_time(expires_at) + if exp is None: + return "" + if datetime.utcnow() + timedelta(minutes=2) >= exp: + return "" + return token + except Exception: + return "" + + +def _write_cached_token(token: str, expires_at: str) -> None: + try: + TOKEN_CACHE.parent.mkdir(parents=True, exist_ok=True) + TOKEN_CACHE.write_text( + json.dumps({"token": token, "expires_at": expires_at}), + encoding="utf-8", + ) + except Exception: + pass + + +def _create_session_token() -> str: + payload = json.dumps( + { + "user_id": USER_ID, + "agent_id": AGENT_ID, + "allowed_confidentiality_scopes": ["work", "personal", "finance", "health", "private"], + "capabilities": ["search", "propose_write"], + "ttl_minutes": 24 * 60, + } + ).encode("utf-8") + headers = {"Content-Type": "application/json"} + if ADMIN_KEY: + headers["X-Engram-Admin-Key"] = ADMIN_KEY + req = Request( + f"{API_BASE}/v1/sessions", + data=payload, + headers=headers, + method="POST", + ) + resp = urlopen(req, timeout=SESSION_TIMEOUT) + body = json.loads(resp.read().decode("utf-8")) + token = body.get("token", "") + expires_at = body.get("expires_at", "") + if token and expires_at: + _write_cached_token(token, expires_at) + return token + + +def _get_token() -> str: + env_token = os.environ.get("ENGRAM_API_TOKEN", "").strip() + if env_token: + return env_token + + cached = _read_cached_token() + if cached: + return cached + + try: + return _create_session_token() + except Exception: + return "" + + +def _search(query: str, token: str) -> list: + payload = json.dumps( + { + "query": query, + "limit": 5, + "user_id": USER_ID, + "agent_id": AGENT_ID, + } + ).encode("utf-8") + headers = {"Content-Type": "application/json"} + if token: + headers["Authorization"] = f"Bearer {token}" req = Request( f"{API_BASE}/v1/search", data=payload, - headers={"Content-Type": "application/json"}, + headers=headers, method="POST", ) resp = urlopen(req, timeout=SEARCH_TIMEOUT) @@ -77,20 +153,16 @@ def _search(query: str) -> list: def _format_memories(results: list) -> str: - """Turn search results into the injected system-message block.""" lines = [SENTINEL] for idx, mem in enumerate(results, 1): layer = mem.get("layer", "sml") score = mem.get("composite_score", mem.get("score", 0.0)) - content = mem.get("memory", mem.get("content", "")).strip() + content = mem.get("memory", mem.get("content", mem.get("details", ""))).strip() lines.append(f"{idx}. [{layer}, relevance {score:.2f}] {content}") return "\n".join(lines) def main() -> None: - """Entry point — orchestrates health-check → search → output.""" - # Read the user prompt. Claude Code may pass it via USER_PROMPT env var - # or via STDIN depending on hook invocation mode. raw_prompt = os.environ.get("USER_PROMPT", "") if not raw_prompt: try: @@ -102,20 +174,22 @@ def main() -> None: sys.stdout.write("{}") return - # Phase 1 – health check (fast-fail) if not _health_check(): sys.stdout.write("{}") return - # Phase 2 – search + token = _get_token() + if not token: + sys.stdout.write("{}") + return + query = _derive_query(raw_prompt) - results = _search(query) + results = _search(query, token) if not results: sys.stdout.write("{}") return - # Emit the hook response output = {"systemMessage": _format_memories(results)} sys.stdout.write(json.dumps(output)) @@ -124,5 +198,4 @@ def main() -> None: try: main() except Exception: - # Outermost safety net — never crash, never block the user sys.stdout.write("{}") diff --git a/pyproject.toml b/pyproject.toml index a9d69f4..1b4fcc8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,17 +4,17 @@ build-backend = "setuptools.build_meta" [project] name = "engram" -version = "0.1.0" -description = "Engram: Biologically-inspired memory layer for AI agents with forgetting, echo encoding, and dynamic categories" +version = "0.4.0" +description = "Memory layer for AI agents — biologically-inspired forgetting, multi-agent trust, and plug-and-play integrations" readme = "README.md" requires-python = ">=3.9" license = {text = "MIT"} authors = [ {name = "Engram Team"} ] -keywords = ["memory", "ai", "agents", "forgetting", "llm", "echo", "categories"] +keywords = ["memory-layer", "mcp", "claude", "cursor", "codex", "ai", "agents", "forgetting", "llm"] classifiers = [ - "Development Status :: 3 - Alpha", + "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", @@ -72,6 +72,8 @@ engram-api = "engram.api.server:run" Homepage = "https://github.com/Ashish-dwi99/Engram" Repository = "https://github.com/Ashish-dwi99/Engram" Issues = "https://github.com/Ashish-dwi99/Engram/issues" +Documentation = "https://github.com/Ashish-dwi99/Engram#readme" +Changelog = "https://github.com/Ashish-dwi99/Engram/blob/main/CHANGELOG.md" [tool.setuptools.packages.find] where = ["."]