LangChain 框架在大项目中的“长期记忆”之痛 LangChain 框架在大项目中的“长期记忆”之痛前言用 LangChain 做原型很快但做大项目就会遇到一个很尴尬的问题长期记忆怎么搞ConversationBufferMemory 越堆越多、SummaryMemory 压缩丢信息、VectorStoreMemory 检索不准确。各种方案都试过各有各的问题。今天聊聊 LangChain 长期记忆的工程化挑战。一、 底层原理1.1 LangChain 记忆的层次结构LangChain 的记忆模块是一个层次化的架构graph TD A[ConversationBufferMemory] -- B[全部保存] B -- C[Token 超限] C -- D[OOM 或截断] E[ConversationSummaryMemory] -- F[自动压缩] F -- G[丢失细节] H[VectorStoreMemory] -- I[向量检索] I -- J[检索不准确] K[CombinedMemory] -- L[混合使用] L -- M[配置复杂]核心问题Buffer 无限增长Summary 压缩太多Vector 检索不准Combined 太复杂1.2 记忆方案对比方案容量精确度性能复杂度BufferMemory小高差低SummaryMemory中中中中VectorStoreMemory大中中高CombinedMemory大高差高二、 快速上手2.1 基础记忆from langchain.memory import ConversationBufferMemory memory ConversationBufferMemory() memory.chat_memory.add_user_message(你好) memory.chat_memory.add_ai_message(你好) print(memory.load_memory_variables({}))2.2 带摘要的记忆from langchain.memory import ConversationSummaryMemory from langchain.llms import OpenAI memory ConversationSummaryMemory(llmOpenAI()) memory.save_context({input: 你好}, {output: 你好}) memory.save_context({input: 今天天气怎么样}, {output: 今天是晴天。}) print(memory.load_memory_variables({}))三、 核心 API / 深水区3.1 LangChain 记忆问题速查问题表现解决方案Token 超限内存溢出自动压缩丢失细节摘要不全分层记忆检索不准答非所问混合检索性能差响应慢缓存机制3.2 自定义持久化记忆import json import os from langchain.memory import BaseMemory class FileMemory(BaseMemory): def __init__(self, file_pathmemory.json): self.file_path file_path self.memory self._load() def _load(self): if os.path.exists(self.file_path): with open(self.file_path) as f: return json.load(f) return {history: []} def save(self): with open(self.file_path, w) as f: json.dump(self.memory, f, ensure_asciiFalse) property def memory_variables(self): return [history] def load_memory_variables(self, inputs): return self.memory def save_context(self, inputs, outputs): self.memory[history].append({ input: inputs.get(input, ), output: outputs.get(output, ) }) self.save() if len(self.memory[history]) 100: self._compress() def _compress(self): self.memory[history] self.memory[history][-50:]3.3 分层记忆实现class LayeredMemory: def __init__(self, short_term_size20, long_term_cap1000): self.short_term [] self.long_term [] self.short_term_size short_term_size def add(self, interaction: dict): self.short_term.append(interaction) if len(self.short_term) self.short_term_size: self._archive() def _archive(self): oldest self.short_term.pop(0) self.long_term.append(oldest) if len(self.long_term) 1000: self.long_term self.long_term[-500:] def get_context(self) - str: context [] for item in self.short_term[-10:]: context.append(f用户: {item[input]}) context.append(f助手: {item[output]}) return \n.join(context)四、 实战演练4.1 完整的 LangChain 记忆系统from typing import Dict, List, Any, Optional import json import time class EngineeringMemory: def __init__(self, llm, persist_pathmemory.json): self.llm llm self.persist_path persist_path self.short_term: List[Dict] [] self.summaries: List[str] [] self._load() def add_interaction(self, user_input: str, output: str): interaction { user: user_input, assistant: output, timestamp: time.time() } self.short_term.append(interaction) if len(self.short_term) 20: self._summarize() self._save() def _summarize(self): old self.short_term[:-10] if old: text \n.join(fU: {i[user]}\nA: {i[assistant]} for i in old) summary self.llm(f总结: {text[:1000]}) self.summaries.append(summary) self.short_term self.short_term[-10:] def get_context(self) - str: parts [] if self.summaries: parts.append(【历史摘要】) parts.extend(self.summaries[-3:]) if self.short_term: parts.append(【最近对话】) for i in self.short_term[-5:]: parts.append(f用户: {i[user]}) parts.append(f助手: {i[assistant]}) return \n.join(parts) def _save(self): data {short_term: self.short_term, summaries: self.summaries} with open(self.persist_path, w) as f: json.dump(data, f, ensure_asciiFalse) def _load(self): try: with open(self.persist_path) as f: data json.load(f) self.short_term data.get(short_term, []) self.summaries data.get(summaries, []) except: pass memory EngineeringMemory(llm) for i in range(50): memory.add_interaction(f第{i}次提问, f第{i}次回复) print(memory.get_context()[:500])五、 避坑指南与最佳实践技巧BufferMemory 不要单用一定要加摘要或者分层的机制。⚠️警告压缩次数太多会丢细节保存原始数据摘要只是索引。✅推荐持久化到文件或数据库进程重启后记忆不丢失。六、 综合实战演示6.1 生产级 LangChain 记忆from typing import Dict, List, Optional from datetime import datetime class LangChainPersistentMemory: def __init__(self, llm, user_id: str): self.llm llm self.user_id user_id self.short_memory: List[Dict] [] self.long_memory: Dict[str, str] {} def add(self, role: str, content: str): msg {role: role, content: content, time: datetime.now().isoformat()} self.short_memory.append(msg) if len(self.short_memory) 10: self._compress() def _compress(self): to_compress self.short_memory[:-5] if not to_compress: return text \n.join(m[content] for m in to_compress) summary self.llm(f总结: {text}) key fsummary_{len(self.long_memory)} self.long_memory[key] summary self.short_memory self.short_memory[-5:] def get_context(self, token_limit3000) - str: parts [] for k, v in self.long_memory.items(): part f[历史] {v} parts.append(part) for m in self.short_memory: part f{m[role]}: {m[content]} parts.append(part) return \n.join(parts) memory LangChainPersistentMemory(llm, user_001) for i in range(15): memory.add(user, f问题{i}) memory.add(assistant, f回答{i}) print(memory.get_context())总结LangChain 长期记忆的要点不用单用 BufferMemory分层记忆架构自动摘要压缩持久化存储