From e3a0147570e5a60b2fa6709d5b67fe999316aaa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Te=C3=AFlo=20M?= Date: Thu, 7 Nov 2024 12:38:29 +0100 Subject: [PATCH 1/2] Add Claude caching --- package-lock.json | 75 +++++++++++++++++++++------------------------ settings.example.js | 7 ++++- src/ClaudeClient.js | 35 ++++++++++++++++----- 3 files changed, 68 insertions(+), 49 deletions(-) diff --git a/package-lock.json b/package-lock.json index 4c9e238..0604ac4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,11 +1,11 @@ { - "name": "@socketteer/bingleton-api", + "name": "@socketteer/clooi", "version": "0.9", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "@socketteer/bingleton-api", + "name": "@socketteer/clooi", "version": "0.9", "license": "MIT", "dependencies": { @@ -828,11 +828,11 @@ } }, "node_modules/braces": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", - "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", "dependencies": { - "fill-range": "^7.0.1" + "fill-range": "^7.1.1" }, "engines": { "node": ">=8" @@ -1062,9 +1062,9 @@ "dev": true }, "node_modules/cookie": { - "version": "0.5.0", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz", - "integrity": "sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw==", + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", "engines": { "node": ">= 0.6" } @@ -1882,9 +1882,9 @@ } }, "node_modules/fill-range": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", - "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", "dependencies": { "to-regex-range": "^5.0.1" }, @@ -1893,13 +1893,13 @@ } }, "node_modules/find-my-way": { - "version": "8.1.0", - "resolved": "https://registry.npmjs.org/find-my-way/-/find-my-way-8.1.0.tgz", - "integrity": "sha512-41QwjCGcVTODUmLLqTMeoHeiozbMXYMAE1CKFiDyi9zVZ2Vjh0yz3MF0WQZoIb+cmzP/XlbFjlF2NtJmvZHznA==", + "version": "8.2.2", + "resolved": "https://registry.npmjs.org/find-my-way/-/find-my-way-8.2.2.tgz", + "integrity": "sha512-Dobi7gcTEq8yszimcfp/R7+owiT4WncAJ7VTTgFH1jYJ5GaG1FbhjwDG820hptN0QDFvzVY3RfCzdInvGPGzjA==", "dependencies": { "fast-deep-equal": "^3.1.3", "fast-querystring": "^1.0.0", - "safe-regex2": "^2.0.0" + "safe-regex2": "^3.1.0" }, "engines": { "node": ">=14" @@ -3157,20 +3157,15 @@ } }, "node_modules/light-my-request": { - "version": "5.11.0", - "resolved": "https://registry.npmjs.org/light-my-request/-/light-my-request-5.11.0.tgz", - "integrity": "sha512-qkFCeloXCOMpmEdZ/MV91P8AT4fjwFXWaAFz3lUeStM8RcoM1ks4J/F8r1b3r6y/H4u3ACEJ1T+Gv5bopj7oDA==", + "version": "5.14.0", + "resolved": "https://registry.npmjs.org/light-my-request/-/light-my-request-5.14.0.tgz", + "integrity": "sha512-aORPWntbpH5esaYpGOOmri0OHDOe3wC5M2MQxZ9dvMLZm6DnaAn0kJlcbU9hwsQgLzmZyReKwFwwPkR+nHu5kA==", "dependencies": { - "cookie": "^0.5.0", - "process-warning": "^2.0.0", + "cookie": "^0.7.0", + "process-warning": "^3.0.0", "set-cookie-parser": "^2.4.1" } }, - "node_modules/light-my-request/node_modules/process-warning": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/process-warning/-/process-warning-2.3.2.tgz", - "integrity": "sha512-n9wh8tvBe5sFmsqlg+XQhaQLumwpqoAUruLwjCopgTmUBjJ/fjtBsJzKleCaIGBOMXYEhp1YfKl4d7rJ5ZKJGA==" - }, "node_modules/locate-path": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", @@ -4057,11 +4052,11 @@ } }, "node_modules/ret": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/ret/-/ret-0.2.2.tgz", - "integrity": "sha512-M0b3YWQs7R3Z917WRQy1HHA7Ba7D8hvZg6UE5mLykJxQVE2ju0IXbGlaHPPlkY+WN7wFP+wUMXmBFA0aV6vYGQ==", + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/ret/-/ret-0.4.3.tgz", + "integrity": "sha512-0f4Memo5QP7WQyUEAYUO3esD/XjOc3Zjjg5CPsAq1p8sIu0XPeMbHJemKA0BO7tV0X7+A0FoEpbmHXWxPyD3wQ==", "engines": { - "node": ">=4" + "node": ">=10" } }, "node_modules/reusify": { @@ -4192,11 +4187,11 @@ } }, "node_modules/safe-regex2": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/safe-regex2/-/safe-regex2-2.0.0.tgz", - "integrity": "sha512-PaUSFsUaNNuKwkBijoAPHAK6/eM6VirvyPWlZ7BAQy4D+hCvh4B6lIG+nPdhbFfIbP+gTGBcrdsOaUs0F+ZBOQ==", + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/safe-regex2/-/safe-regex2-3.1.0.tgz", + "integrity": "sha512-RAAZAGbap2kBfbVhvmnTFv73NWLMvDGOITFYTZBAaY8eR+Ir4ef7Up/e7amo+y1+AH+3PtLkrt9mvcTsG9LXug==", "dependencies": { - "ret": "~0.2.0" + "ret": "~0.4.0" } }, "node_modules/safe-stable-stringify": { @@ -4724,9 +4719,9 @@ "dev": true }, "node_modules/undici": { - "version": "5.28.3", - "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.3.tgz", - "integrity": "sha512-3ItfzbrhDlINjaP0duwnNsKpDQk3acHI3gVJ1z4fmwMK31k5G9OVIAMLSIaP6w4FaGkaAkN6zaQO9LUvZ1t7VA==", + "version": "5.28.4", + "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.4.tgz", + "integrity": "sha512-72RFADWFqKmUb2hmmvNODKL3p9hcB6Gt2DOQMis1SEBaV6a4MH8soBvzg+95CYhCKPFedut2JY9bMfrDl9D23g==", "dependencies": { "@fastify/busboy": "^2.0.0" }, @@ -4901,9 +4896,9 @@ "dev": true }, "node_modules/ws": { - "version": "8.16.0", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.16.0.tgz", - "integrity": "sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ==", + "version": "8.18.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz", + "integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==", "engines": { "node": ">=10.0.0" }, diff --git a/settings.example.js b/settings.example.js index 73c06e5..f2f6c8a 100644 --- a/settings.example.js +++ b/settings.example.js @@ -68,7 +68,7 @@ export default { }, claudeOptions: { modelOptions: { - model: 'claude-3-opus-20240229', + model: 'claude-3-opus-20240229', // 'claude-3-5-sonnet-20241022' max_tokens: 4096, temperature: 1, stream: true, @@ -77,6 +77,11 @@ export default { systemMessage: '', // fs.readFileSync('./contexts/waluigiASCII.txt', 'utf8'), n: 2, }, + // Add cache options + cacheOptions: { + enabled: true, // users can set this to true to enable caching + minTokens: 1024, // minimum tokens required for caching (1024 for Sonnet/Opus, 2048 for Haiku) + }, }, infrastructOptions: { modelOptions: { diff --git a/src/ClaudeClient.js b/src/ClaudeClient.js index 057bb15..f85e104 100644 --- a/src/ClaudeClient.js +++ b/src/ClaudeClient.js @@ -62,9 +62,11 @@ export default class ClaudeClient extends ChatClient { } getHeaders() { - let anthropicBeta + let anthropicBeta; if ('steering' in this.options && this.options.steering) { anthropicBeta = 'steering-2024-06-04'; + } else if (this.options?.cacheOptions?.enabled || this.cache?.enabled) { + anthropicBeta = 'prompt-caching-2024-07-31'; } else { anthropicBeta = 'messages-2023-12-15'; } @@ -114,22 +116,39 @@ export default class ClaudeClient extends ChatClient { } buildApiParams(userMessage = null, previousMessages = [], systemMessage = null) { - // const maxHistoryLength = 20; const { messages: history, system } = super.buildApiParams(userMessage, previousMessages, systemMessage); - // merge all consecutive messages from the same author const mergedMessageHistory = []; let lastMessage = null; + const cacheEnabled = this.options?.cacheOptions?.enabled || this.cache?.enabled || false; + for (const message of history) { if (lastMessage && lastMessage.role === message.role) { - lastMessage.content += `${message.content}`; + const lastContent = lastMessage.content[lastMessage.content.length - 1]; + lastContent.text += message.content; } else { - lastMessage = message; - mergedMessageHistory.push(message); + const messageWithCache = { + role: message.role, + content: [{ + type: 'text', + text: message.content, + ...(cacheEnabled && { cache_control: { type: 'ephemeral' } }) + }] + }; + lastMessage = messageWithCache; + mergedMessageHistory.push(messageWithCache); } } + + const systemWithCache = system ? [{ + type: 'text', + text: system, + ...(cacheEnabled && { cache_control: { type: 'ephemeral' } }) + }] : undefined; + return { - messages: mergedMessageHistory, //.slice(-maxHistoryLength), - ...(system ? { system } : {}), + messages: mergedMessageHistory, + ...(systemWithCache ? { system: systemWithCache } : {}) }; } } + From 1a376e46b74aa2b1e3704af53a92f27d431018a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Te=C3=AFlo=20M?= Date: Thu, 7 Nov 2024 13:22:13 +0100 Subject: [PATCH 2/2] add dynamic block caching --- src/ClaudeClient.js | 100 ++++++++++++++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 32 deletions(-) diff --git a/src/ClaudeClient.js b/src/ClaudeClient.js index f85e104..31932fe 100644 --- a/src/ClaudeClient.js +++ b/src/ClaudeClient.js @@ -1,4 +1,5 @@ import ChatClient from './ChatClient.js'; +import { getMessagesForConversation, getChildren, getParent } from './conversation.js'; const CLAUDE_MODEL_INFO = { default: { @@ -116,39 +117,74 @@ export default class ClaudeClient extends ChatClient { } buildApiParams(userMessage = null, previousMessages = [], systemMessage = null) { - const { messages: history, system } = super.buildApiParams(userMessage, previousMessages, systemMessage); - const mergedMessageHistory = []; - let lastMessage = null; - const cacheEnabled = this.options?.cacheOptions?.enabled || this.cache?.enabled || false; - - for (const message of history) { - if (lastMessage && lastMessage.role === message.role) { - const lastContent = lastMessage.content[lastMessage.content.length - 1]; - lastContent.text += message.content; - } else { - const messageWithCache = { - role: message.role, - content: [{ - type: 'text', - text: message.content, - ...(cacheEnabled && { cache_control: { type: 'ephemeral' } }) - }] - }; - lastMessage = messageWithCache; - mergedMessageHistory.push(messageWithCache); - } - } + const { messages: history, system } = super.buildApiParams(userMessage, previousMessages, systemMessage); + const mergedMessageHistory = []; + let lastMessage = null; + const cacheEnabled = this.options?.cacheOptions?.enabled || this.cache?.enabled || false; + const MAX_CACHE_BLOCKS = 4; + + // Check if we're at a point where conversation branches into multiple paths + const currentBranch = getMessagesForConversation(previousMessages, userMessage?.parentMessageId); + const isBranchingPoint = previousMessages.length > 0 && + getChildren(previousMessages, previousMessages[previousMessages.length - 1].id).length > 1; - const systemWithCache = system ? [{ - type: 'text', - text: system, - ...(cacheEnabled && { cache_control: { type: 'ephemeral' } }) - }] : undefined; + // Find last two points where conversation branched into multiple paths + const branchPoints = []; + let messageId = userMessage?.parentMessageId; + while (messageId && branchPoints.length < 2) { + if (getChildren(previousMessages, messageId).length > 1) { + branchPoints.push(messageId); + } + messageId = getParent(previousMessages, messageId)?.id; + } - return { - messages: mergedMessageHistory, - ...(systemWithCache ? { system: systemWithCache } : {}) - }; + // Determine if current message should be a cache checkpoint + const shouldCacheMessage = (message, index) => { + if (!cacheEnabled) return false; + + if (branchPoints.includes(message.id)) return true; + if (isBranchingPoint && index === mergedMessageHistory.length - 1) return true; + + const usedCacheBlocks = branchPoints.length + (isBranchingPoint ? 1 : 0); + const remainingBlocks = MAX_CACHE_BLOCKS - usedCacheBlocks; + if (remainingBlocks > 0) { + return currentBranch.length >= 10 && currentBranch.length % 10 === 0; + } + + return false; + }; + + for (const [index, message] of history.entries()) { + if (lastMessage && lastMessage.role === message.role) { + const lastContent = lastMessage.content[lastMessage.content.length - 1]; + lastContent.text += message.content; + } else { + const messageContent = { + type: 'text', + text: message.content, + ...(shouldCacheMessage(message, index) && { cache_control: { type: 'ephemeral' } }) + }; + + const messageWithOptionalCache = { + role: message.role, + content: [messageContent] + }; + + lastMessage = messageWithOptionalCache; + mergedMessageHistory.push(messageWithOptionalCache); + } + } + + // Add system message to cache if present + const systemWithCache = system ? [{ + type: 'text', + text: system, + ...(cacheEnabled && { cache_control: { type: 'ephemeral' } }) + }] : undefined; + + return { + messages: mergedMessageHistory, + ...(systemWithCache ? { system: systemWithCache } : {}) + }; } } -