Back to Posts

AI Agent Memory and Context Management

By Lumina Software
aiagentic-aiarchitecturepatterns

AI Agent Memory and Context Management

AI agents need memory to be useful. Without it, every interaction starts from scratch. With proper memory and context management, agents can learn, remember, and build on previous interactions. Here's how to implement effective memory systems for AI agents.

Types of Memory

1. Short-Term Memory (Conversation Context)

Current conversation window:

class ConversationMemory {
  private messages: Message[] = [];
  private maxTokens = 4000;
  
  addMessage(message: Message): void {
    this.messages.push(message);
    this.trimIfNeeded();
  }
  
  private trimIfNeeded(): void {
    while (this.getTotalTokens() > this.maxTokens) {
      // Remove oldest messages, keep system message
      if (this.messages.length > 1) {
        this.messages.splice(1, 1);
      } else {
        break;
      }
    }
  }
  
  getContext(): Message[] {
    return this.messages;
  }
}

2. Long-Term Memory (Persistent Storage)

Information that persists across sessions:

class LongTermMemory {
  private storage: Storage;
  
  async save(userId: string, key: string, value: any): Promise<void> {
    await this.storage.set(`${userId}:${key}`, value);
  }
  
  async load(userId: string, key: string): Promise<any> {
    return await this.storage.get(`${userId}:${key}`);
  }
  
  async remember(userId: string, fact: Fact): Promise<void> {
    const facts = await this.load(userId, 'facts') || [];
    facts.push(fact);
    await this.save(userId, 'facts', facts);
  }
  
  async recall(userId: string, query: string): Promise<Fact[]> {
    const facts = await this.load(userId, 'facts') || [];
    return this.searchFacts(facts, query);
  }
}

3. Episodic Memory (Event History)

Remember specific events:

class EpisodicMemory {
  private events: Event[] = [];
  
  recordEvent(event: Event): void {
    this.events.push({
      ...event,
      timestamp: Date.now(),
    });
  }
  
  recallEvents(query: EventQuery): Event[] {
    return this.events.filter(event => this.matchesQuery(event, query));
  }
  
  getRecentEvents(limit = 10): Event[] {
    return this.events
      .slice(-limit)
      .sort((a, b) => b.timestamp - a.timestamp);
  }
}

4. Semantic Memory (Knowledge Base)

Structured knowledge:

class SemanticMemory {
  private knowledge: Map<string, Knowledge> = new Map();
  
  store(knowledge: Knowledge): void {
    this.knowledge.set(knowledge.id, knowledge);
  }
  
  retrieve(query: string): Knowledge[] {
    // Semantic search
    return Array.from(this.knowledge.values())
      .filter(k => this.isRelevant(k, query))
      .sort((a, b) => this.relevanceScore(b, query) - this.relevanceScore(a, query));
  }
}

Memory Architectures

1. Hierarchical Memory

class HierarchicalMemory {
  private shortTerm: ConversationMemory;
  private longTerm: LongTermMemory;
  private episodic: EpisodicMemory;
  private semantic: SemanticMemory;
  
  async getContext(userId: string, query: string): Promise<Context> {
    // Get relevant long-term memories
    const longTermFacts = await this.longTerm.recall(userId, query);
    
    // Get relevant episodes
    const episodes = this.episodic.recallEvents({ userId, query });
    
    // Get semantic knowledge
    const knowledge = this.semantic.retrieve(query);
    
    // Get conversation context
    const conversation = this.shortTerm.getContext();
    
    return {
      conversation,
      longTerm: longTermFacts,
      episodes,
      knowledge,
    };
  }
}

2. Vector Memory

Store memories as embeddings:

class VectorMemory {
  private embeddings: VectorStore;
  
  async store(memory: Memory): Promise<void> {
    const embedding = await this.embed(memory.content);
    await this.embeddings.add({
      id: memory.id,
      embedding,
      metadata: memory.metadata,
      content: memory.content,
    });
  }
  
  async recall(query: string, limit = 5): Promise<Memory[]> {
    const queryEmbedding = await this.embed(query);
    const results = await this.embeddings.search(queryEmbedding, limit);
    return results.map(r => ({
      id: r.id,
      content: r.content,
      metadata: r.metadata,
      relevance: r.score,
    }));
  }
}

Context Management Strategies

1. Sliding Window

Keep most recent context:

class SlidingWindowContext {
  private messages: Message[] = [];
  private maxMessages = 20;
  
  addMessage(message: Message): void {
    this.messages.push(message);
    
    if (this.messages.length > this.maxMessages) {
      // Remove oldest, but keep system message
      const systemMessage = this.messages[0];
      this.messages = [
        systemMessage,
        ...this.messages.slice(-this.maxMessages + 1),
      ];
    }
  }
  
  getContext(): Message[] {
    return this.messages;
  }
}

2. Summarization

Summarize old context:

class SummarizedContext {
  private recentMessages: Message[] = [];
  private summary: string = '';
  
  async addMessage(message: Message): Promise<void> {
    this.recentMessages.push(message);
    
    if (this.recentMessages.length > 10) {
      // Summarize old messages
      const toSummarize = this.recentMessages.slice(0, -10);
      const newSummary = await this.summarize(toSummarize);
      this.summary = `${this.summary}\n${newSummary}`;
      this.recentMessages = this.recentMessages.slice(-10);
    }
  }
  
  getContext(): Message[] {
    return [
      { role: 'system', content: `Previous conversation summary: ${this.summary}` },
      ...this.recentMessages,
    ];
  }
}

3. Relevance-Based Retrieval

Only include relevant memories:

class RelevanceBasedContext {
  private allMemories: Memory[] = [];
  
  async getContext(query: string): Promise<Context> {
    // Score all memories for relevance
    const scored = await Promise.all(
      this.allMemories.map(async memory => ({
        memory,
        score: await this.relevanceScore(memory, query),
      }))
    );
    
    // Get top-k most relevant
    const relevant = scored
      .sort((a, b) => b.score - a.score)
      .slice(0, 10)
      .map(s => s.memory);
    
    return {
      relevantMemories: relevant,
      query,
    };
  }
}

Implementation Example

Complete Memory System

class AgentMemory {
  private conversation: ConversationMemory;
  private longTerm: LongTermMemory;
  private episodic: EpisodicMemory;
  private vectorStore: VectorMemory;
  
  constructor() {
    this.conversation = new ConversationMemory();
    this.longTerm = new LongTermMemory();
    this.episodic = new EpisodicMemory();
    this.vectorStore = new VectorMemory();
  }
  
  async processInteraction(
    userId: string,
    userMessage: string,
    agentResponse: string
  ): Promise<void> {
    // Store in conversation memory
    this.conversation.addMessage({
      role: 'user',
      content: userMessage,
    });
    this.conversation.addMessage({
      role: 'assistant',
      content: agentResponse,
    });
    
    // Extract and store facts
    const facts = await this.extractFacts(userMessage, agentResponse);
    for (const fact of facts) {
      await this.longTerm.remember(userId, fact);
      await this.vectorStore.store({
        id: generateId(),
        content: fact.content,
        metadata: { userId, timestamp: Date.now() },
      });
    }
    
    // Record episode
    this.episodic.recordEvent({
      userId,
      type: 'interaction',
      userMessage,
      agentResponse,
      timestamp: Date.now(),
    });
  }
  
  async getContext(userId: string, query: string): Promise<AgentContext> {
    // Get conversation context
    const conversation = this.conversation.getContext();
    
    // Get relevant long-term memories
    const longTermMemories = await this.longTerm.recall(userId, query);
    
    // Get relevant vector memories
    const vectorMemories = await this.vectorStore.recall(query);
    
    // Get recent episodes
    const episodes = this.episodic.recallEvents({ userId, limit: 5 });
    
    return {
      conversation,
      longTermMemories,
      vectorMemories,
      episodes,
    };
  }
  
  private async extractFacts(
    userMessage: string,
    agentResponse: string
  ): Promise<Fact[]> {
    // Use LLM to extract facts
    const prompt = `
Extract factual information from this conversation:

User: ${userMessage}
Agent: ${agentResponse}

Extract facts in JSON format:
[{ "fact": "...", "confidence": 0.0-1.0 }]
`;
    
    const response = await this.llm.generate(prompt);
    return JSON.parse(response);
  }
}

Memory Optimization

1. Compression

class CompressedMemory {
  async compress(memories: Memory[]): Promise<CompressedMemory> {
    // Group related memories
    const clusters = this.clusterMemories(memories);
    
    // Summarize each cluster
    const summaries = await Promise.all(
      clusters.map(cluster => this.summarizeCluster(cluster))
    );
    
    return {
      summaries,
      originalCount: memories.length,
      compressedCount: summaries.length,
    };
  }
}

2. Forgetting

class ForgettingMemory {
  async forgetOld(userId: string, olderThanDays: number): Promise<void> {
    const cutoff = Date.now() - olderThanDays * 24 * 60 * 60 * 1000;
    
    const memories = await this.getAllMemories(userId);
    const toKeep = memories.filter(m => m.timestamp > cutoff);
    
    await this.saveMemories(userId, toKeep);
  }
  
  async forgetIrrelevant(userId: string): Promise<void> {
    const memories = await this.getAllMemories(userId);
    
    // Score relevance
    const scored = await Promise.all(
      memories.map(async m => ({
        memory: m,
        relevance: await this.calculateRelevance(m, userId),
      }))
    );
    
    // Keep only relevant memories
    const relevant = scored
      .filter(s => s.relevance > 0.3)
      .map(s => s.memory);
    
    await this.saveMemories(userId, relevant);
  }
}

Best Practices

  1. Layer memory: Use multiple memory types
  2. Relevance filtering: Only include relevant memories
  3. Compression: Summarize old context
  4. Forgetting: Remove irrelevant memories
  5. Vector search: Use embeddings for semantic search
  6. Context limits: Respect token limits
  7. Update frequently: Keep memory current

Conclusion

Effective memory systems enable agents to:

  • Remember: Recall past interactions
  • Learn: Build on previous knowledge
  • Contextualize: Understand current situation
  • Personalize: Adapt to individual users

Key principles:

  • Multiple memory types: Short-term, long-term, episodic, semantic
  • Relevance-based retrieval: Only include what matters
  • Efficient storage: Compress and forget appropriately
  • Vector search: Semantic similarity for retrieval

Build proper memory systems, and your agents will feel truly intelligent.