[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"skill-4cd27c01-b56e-4cde-8aa9-c1ce0c10d8f2":3,"$f9bb-3RGIbRT4AlLYj19o177npUzp2b49-91QOmh_9IU":42},{"id":4,"title":5,"description":6,"categoryId":7,"moduleId":8,"tags":9,"prompt":10,"icon":11,"source":12,"sourceUrl":13,"authorId":14,"authorName":15,"isPublic":16,"stars":17,"runs":18,"createdAt":19,"updatedAt":19,"module":20,"category":27,"packages":33},"4cd27c01-b56e-4cde-8aa9-c1ce0c10d8f2","context-window-management","管理LLM上下文窗口的策略包括","cat_coding_backend","mod_coding","sickn33,coding","---\nname: context-window-management\ndescription: Strategies for managing LLM context windows including\n  summarization, trimming, routing, and avoiding context rot\nrisk: unknown\nsource: vibeship-spawner-skills (Apache 2.0)\ndate_added: 2026-02-27\n---\n\n# Context Window Management\n\nStrategies for managing LLM context windows including summarization, trimming, routing, and avoiding context rot\n\n## Capabilities\n\n- context-engineering\n- context-summarization\n- context-trimming\n- context-routing\n- token-counting\n- context-prioritization\n\n## Prerequisites\n\n- Knowledge: LLM fundamentals, Tokenization basics, Prompt engineering\n- Skills_recommended: prompt-engineering\n\n## Scope\n\n- Does_not_cover: RAG implementation details, Model fine-tuning, Embedding models\n- Boundaries: Focus is context optimization, Covers strategies not specific implementations\n\n## Ecosystem\n\n### Primary_tools\n\n- tiktoken - OpenAI's tokenizer for counting tokens\n- LangChain - Framework with context management utilities\n- Claude API - 200K+ context with caching support\n\n## Patterns\n\n### Tiered Context Strategy\n\nDifferent strategies based on context size\n\n**When to use**: Building any multi-turn conversation system\n\ninterface ContextTier {\n    maxTokens: number;\n    strategy: 'full' | 'summarize' | 'rag';\n    model: string;\n}\n\nconst TIERS: ContextTier[] = [\n    { maxTokens: 8000, strategy: 'full', model: 'claude-3-haiku' },\n    { maxTokens: 32000, strategy: 'full', model: 'claude-3-5-sonnet' },\n    { maxTokens: 100000, strategy: 'summarize', model: 'claude-3-5-sonnet' },\n    { maxTokens: Infinity, strategy: 'rag', model: 'claude-3-5-sonnet' }\n];\n\nasync function selectStrategy(messages: Message[]): ContextTier {\n    const tokens = await countTokens(messages);\n\n    for (const tier of TIERS) {\n        if (tokens \u003C= tier.maxTokens) {\n            return tier;\n        }\n    }\n    return TIERS[TIERS.length - 1];\n}\n\nasync function prepareContext(messages: Message[]): PreparedContext {\n    const tier = await selectStrategy(messages);\n\n    switch (tier.strategy) {\n        case 'full':\n            return { messages, model: tier.model };\n\n        case 'summarize':\n            const summary = await summarizeOldMessages(messages);\n            return { messages: [summary, ...recentMessages(messages)], model: tier.model };\n\n        case 'rag':\n            const relevant = await retrieveRelevant(messages);\n            return { messages: [...relevant, ...recentMessages(messages)], model: tier.model };\n    }\n}\n\n### Serial Position Optimization\n\nPlace important content at start and end\n\n**When to use**: Constructing prompts with significant context\n\n\u002F\u002F LLMs weight beginning and end more heavily\n\u002F\u002F Structure prompts to leverage this\n\nfunction buildOptimalPrompt(components: {\n    systemPrompt: string;\n    criticalContext: string;\n    conversationHistory: Message[];\n    currentQuery: string;\n}): string {\n    \u002F\u002F START: System instructions (always first)\n    const parts = [components.systemPrompt];\n\n    \u002F\u002F CRITICAL CONTEXT: Right after system (high primacy)\n    if (components.criticalContext) {\n        parts.push(`## Key Context\\n${components.criticalContext}`);\n    }\n\n    \u002F\u002F MIDDLE: Conversation history (lower weight)\n    \u002F\u002F Summarize if long, keep recent messages full\n    const history = components.conversationHistory;\n    if (history.length > 10) {\n        const oldSummary = summarize(history.slice(0, -5));\n        const recent = history.slice(-5);\n        parts.push(`## Earlier Conversation (Summary)\\n${oldSummary}`);\n        parts.push(`## Recent Messages\\n${formatMessages(recent)}`);\n    } else {\n        parts.push(`## Conversation\\n${formatMessages(history)}`);\n    }\n\n    \u002F\u002F END: Current query (high recency)\n    \u002F\u002F Restate critical requirements here\n    parts.push(`## Current Request\\n${components.currentQuery}`);\n\n    \u002F\u002F FINAL: Reminder of key constraints\n    parts.push(`Remember: ${extractKeyConstraints(components.systemPrompt)}`);\n\n    return parts.join('\\n\\n');\n}\n\n### Intelligent Summarization\n\nSummarize by importance, not just recency\n\n**When to use**: Context exceeds optimal size\n\ninterface MessageWithMetadata extends Message {\n    importance: number;  \u002F\u002F 0-1 score\n    hasCriticalInfo: boolean;  \u002F\u002F User preferences, decisions\n    referenced: boolean;  \u002F\u002F Was this referenced later?\n}\n\nasync function smartSummarize(\n    messages: MessageWithMetadata[],\n    targetTokens: number\n): Message[] {\n    \u002F\u002F Sort by importance, preserve order for tied scores\n    const sorted = [...messages].sort((a, b) =>\n        (b.importance + (b.hasCriticalInfo ? 0.5 : 0) + (b.referenced ? 0.3 : 0)) -\n        (a.importance + (a.hasCriticalInfo ? 0.5 : 0) + (a.referenced ? 0.3 : 0))\n    );\n\n    const keep: Message[] = [];\n    const summarizePool: Message[] = [];\n    let currentTokens = 0;\n\n    for (const msg of sorted) {\n        const msgTokens = await countTokens([msg]);\n        if (currentTokens + msgTokens \u003C targetTokens * 0.7) {\n            keep.push(msg);\n            currentTokens += msgTokens;\n        } else {\n            summarizePool.push(msg);\n        }\n    }\n\n    \u002F\u002F Summarize the low-importance messages\n    if (summarizePool.length > 0) {\n        const summary = await llm.complete(`\n            Summarize these messages, preserving:\n            - Any user preferences or decisions\n            - Key facts that might be referenced later\n            - The overall flow of conversation\n\n            Messages:\n            ${formatMessages(summarizePool)}\n        `);\n\n        keep.unshift({ role: 'system', content: `[Earlier context: ${summary}]` });\n    }\n\n    \u002F\u002F Restore original order\n    return keep.sort((a, b) => a.timestamp - b.timestamp);\n}\n\n### Token Budget Allocation\n\nAllocate token budget across context components\n\n**When to use**: Need predictable context management\n\ninterface TokenBudget {\n    system: number;      \u002F\u002F System prompt\n    criticalContext: number;  \u002F\u002F User prefs, key info\n    history: number;     \u002F\u002F Conversation history\n    query: number;       \u002F\u002F Current query\n    response: number;    \u002F\u002F Reserved for response\n}\n\nfunction allocateBudget(totalTokens: number): TokenBudget {\n    return {\n        system: Math.floor(totalTokens * 0.10),      \u002F\u002F 10%\n        criticalContext: Math.floor(totalTokens * 0.15),  \u002F\u002F 15%\n        history: Math.floor(totalTokens * 0.40),     \u002F\u002F 40%\n        query: Math.floor(totalTokens * 0.10),       \u002F\u002F 10%\n        response: Math.floor(totalTokens * 0.25),    \u002F\u002F 25%\n    };\n}\n\nasync function buildWithBudget(\n    components: ContextComponents,\n    modelMaxTokens: number\n): PreparedContext {\n    const budget = allocateBudget(modelMaxTokens);\n\n    \u002F\u002F Truncate\u002Fsummarize each component to fit budget\n    const prepared = {\n        system: truncateToTokens(components.system, budget.system),\n        criticalContext: truncateToTokens(\n            components.criticalContext, budget.criticalContext\n        ),\n        history: await summarizeToTokens(components.history, budget.history),\n        query: truncateToTokens(components.query, budget.query),\n    };\n\n    \u002F\u002F Reallocate unused budget\n    const used = await countTokens(Object.values(prepared).join('\\n'));\n    const remaining = modelMaxTokens - used - budget.response;\n\n    if (remaining > 0) {\n        \u002F\u002F Give extra to history (most valuable for conversation)\n        prepared.history = await summarizeToTokens(\n            components.history,\n            budget.history + remaining\n        );\n    }\n\n    return prepared;\n}\n\n## Validation Checks\n\n### No Token Counting\n\nSeverity: WARNING\n\nMessage: Building context without token counting. May exceed model limits.\n\nFix action: Count tokens before sending, implement budget allocation\n\n### Naive Message Truncation\n\nSeverity: WARNING\n\nMessage: Truncating messages without summarization. Critical context may be lost.\n\nFix action: Summarize old messages instead of simply removing them\n\n### Hardcoded Token Limit\n\nSeverity: INFO\n\nMessage: Hardcoded token limit. Consider making configurable per model.\n\nFix action: Use model-specific limits from configuration\n\n### No Context Management Strategy\n\nSeverity: WARNING\n\nMessage: LLM calls without context management strategy.\n\nFix action: Implement context management: budgets, summarization, or RAG\n\n## Collaboration\n\n### Delegation Triggers\n\n- retrieval|rag|search -> rag-implementation (Need retrieval system)\n- memory|persistence|remember -> conversation-memory (Need memory storage)\n- cache|caching -> prompt-caching (Need caching optimization)\n\n### Complete Context System\n\nSkills: context-window-management, rag-implementation, conversation-memory, prompt-caching\n\nWorkflow:\n\n```\n1. Design context strategy\n2. Implement RAG for large corpuses\n3. Set up memory persistence\n4. Add caching for performance\n```\n\n## Related Skills\n\nWorks well with: `rag-implementation`, `conversation-memory`, `prompt-caching`, `llm-npc-dialogue`\n\n## When to Use\n- User mentions or implies: context window\n- User mentions or implies: token limit\n- User mentions or implies: context management\n- User mentions or implies: context engineering\n- User mentions or implies: long context\n- User mentions or implies: context overflow\n\n## Limitations\n- Use this skill only when the task clearly matches the scope described above.\n- Do not treat the output as a substitute for environment-specific validation, testing, or expert review.\n- Stop and ask for clarification if required inputs, permissions, safety boundaries, or success criteria are missing.\n","","imported","https:\u002F\u002Fgithub.com\u002Fsickn33\u002Fantigravity-awesome-skills","user_system_seed","SkillOPIC",true,221,466,"2026-05-16 13:13:11",{"id":8,"name":21,"slug":22,"icon":23,"description":24,"sort":25,"createdAt":26},"编程开发","coding","mdi-code-braces","代码生成、调试、审查，提升开发效率",2,"2026-05-16 12:53:40",{"id":7,"name":28,"slug":29,"icon":30,"description":31,"moduleId":8,"sort":25,"skillCount":32,"createdAt":26},"后端开发","backend","mdi-server","API、数据库、服务端架构",296,[34],{"id":35,"skillId":4,"version":36,"fileName":37,"fileSize":38,"filePath":39,"fileHash":40,"manifest":41,"createdAt":19},"8540379b-bfab-45e3-a3ff-618e6fc9140b","1.0.0","context-window-management.zip",3442,"uploads\u002Fskills\u002F4cd27c01-b56e-4cde-8aa9-c1ce0c10d8f2\u002Fcontext-window-management.zip","c71b535a93c83ac93a5e20feb53fea5b5e5984896a9bbe0715f38b1ce048ba44","[{\"path\":\"SKILL.md\",\"isDirectory\":false,\"size\":9466}]",{"code":43,"message":44,"data":45},200,"success",{"items":46,"stats":47,"page":50},[],{"averageRating":48,"totalRatings":48,"ratingCounts":49},0,[48,48,48,48,48],{"limit":51,"offset":48,"hasMore":52,"nextOffset":51,"ratedOnly":16},15,false]