run-llama · logan-markewich · Dec 15, 2024 · Dec 15, 2024
diff --git a/docs/docs/examples/cookbooks/contextual_retrieval.ipynb b/docs/docs/examples/cookbooks/contextual_retrieval.ipynb
@@ -225,7 +225,7 @@
     "from llama_index.core.retrievers import BaseRetriever, VectorIndexRetriever\n",
     "from llama_index.core.schema import NodeWithScore\n",
     "from llama_index.core import VectorStoreIndex, QueryBundle\n",
-    "from llama_index.core.llms import ChatMessage\n",
+    "from llama_index.core.llms import ChatMessage, TextBlock\n",
     "\n",
     "import pandas as pd\n",
     "import copy\n",
@@ -244,18 +244,16 @@
     "            ChatMessage(\n",
     "                role=\"user\",\n",
     "                content=[\n",
-    "                    {\n",
-    "                        \"text\": prompt_document.format(\n",
+    "                    TextBlock(\n",
+    "                        text=prompt_document.format(\n",
     "                            WHOLE_DOCUMENT=WHOLE_DOCUMENT\n",
-    "                        ),\n",
-    "                        \"type\": \"text\",\n",
-    "                        \"cache_control\": {\"type\": \"ephemeral\"},\n",
-    "                    },\n",
-    "                    {\n",
-    "                        \"text\": prompt_chunk.format(CHUNK_CONTENT=node.text),\n",
-    "                        \"type\": \"text\",\n",
-    "                    },\n",
+    "                        )\n",
+    "                    ),\n",
+    "                    TextBlock(\n",
+    "                        text=prompt_chunk.format(CHUNK_CONTENT=node.text)\n",
+    "                    ),\n",
     "                ],\n",
+    "                additional_kwargs={\"cache_control\": {\"type\": \"ephemeral\"}},\n",
     "            ),\n",
     "        ]\n",
     "        new_node.metadata[\"context\"] = str(\n",
@@ -1357,9 +1355,9 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "llamacloud",
+   "display_name": "llama-index-caVs7DDe-py3.10",
    "language": "python",
-   "name": "llamacloud"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {

diff --git a/docs/docs/examples/llm/anthropic_prompt_caching.ipynb b/docs/docs/examples/llm/anthropic_prompt_caching.ipynb
@@ -51,7 +51,7 @@
     "\n",
     "os.environ[\n",
     "    \"ANTHROPIC_API_KEY\"\n",
-    "] = \"sk-...\"  # replace with your Anthropic API key"
+    "] = \"sk-ant-...\"  # replace with your Anthropic API key"
    ]
   },
   {
@@ -90,16 +90,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--2024-09-28 01:22:14--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt\n",
-      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8000::154, 2606:50c0:8001::154, 2606:50c0:8002::154, ...\n",
-      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8000::154|:443... connected.\n",
+      "--2024-12-14 18:39:03--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt\n",
+      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.109.133, ...\n",
+      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
       "HTTP request sent, awaiting response... 200 OK\n",
       "Length: 75042 (73K) [text/plain]\n",
       "Saving to: ‘./paul_graham_essay.txt’\n",
       "\n",
-      "./paul_graham_essay 100%[===================>]  73.28K  --.-KB/s    in 0.01s   \n",
+      "./paul_graham_essay 100%[===================>]  73.28K  --.-KB/s    in 0.04s   \n",
       "\n",
-      "2024-09-28 01:22:14 (5.73 MB/s) - ‘./paul_graham_essay.txt’ saved [75042/75042]\n",
+      "2024-12-14 18:39:03 (1.62 MB/s) - ‘./paul_graham_essay.txt’ saved [75042/75042]\n",
       "\n"
      ]
     }
@@ -161,20 +161,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from llama_index.core.llms import ChatMessage\n",
+    "from llama_index.core.llms import ChatMessage, TextBlock\n",
     "\n",
     "messages = [\n",
     "    ChatMessage(role=\"system\", content=\"You are helpful AI Assitant.\"),\n",
     "    ChatMessage(\n",
     "        role=\"user\",\n",
     "        content=[\n",
-    "            {\n",
-    "                \"text\": f\"{document_text}\",\n",
-    "                \"type\": \"text\",\n",
-    "                \"cache_control\": {\"type\": \"ephemeral\"},\n",
-    "            },\n",
-    "            {\"text\": \"Why did Paul Graham start YC?\", \"type\": \"text\"},\n",
+    "            TextBlock(\n",
+    "                text=f\"{document_text}\",\n",
+    "                type=\"text\",\n",
+    "            ),\n",
+    "            TextBlock(\n",
+    "                text=\"\\n\\nWhy did Paul Graham start YC?\",\n",
+    "                type=\"text\",\n",
+    "            ),\n",
     "        ],\n",
+    "        additional_kwargs={\"cache_control\": {\"type\": \"ephemeral\"}},\n",
     "    ),\n",
     "]\n",
     "\n",
@@ -198,14 +201,14 @@
     {
      "data": {
       "text/plain": [
-       "{'id': 'msg_01KCcFZnbAGjxSKJm7LnXajp',\n",
-       " 'content': [TextBlock(text=\"Based on the essay, it seems Paul Graham started Y Combinator for a few key reasons:\\n\\n1. He had been thinking about ways to improve venture capital and startup funding, like making smaller investments in younger, more technical founders.\\n\\n2. He wanted to try angel investing but hadn't gotten around to it yet, despite intending to for years after Yahoo acquired his company Viaweb.\\n\\n3. He missed working with his former Viaweb co-founders Robert Morris and Trevor Blackwell and wanted to find a project they could collaborate on.\\n\\n4. His girlfriend (later wife) Jessica Livingston was looking for a new job after interviewing at a VC firm, and Graham had been telling her ideas for how to improve VC.\\n\\n5. When giving a talk to Harvard students about startups, he realized there was demand for seed funding and advice from experienced founders.\\n\\n6. They wanted to create an investment firm that would actually implement Graham's ideas about how to better fund and support early-stage startups.\\n\\n7. They were somewhat naïve about how to be angel investors, which allowed them to take novel approaches like the batch model of funding multiple startups at once.\\n\\nSo it was a convergence of Graham's ideas about improving startup funding, his desire to angel invest and work with his former co-founders again, and the opportunity presented by Jessica looking for a new job. Their lack of experience in traditional VC allowed them to take an innovative approach.\", type='text')],\n",
+       "{'id': 'msg_01PAaZDTjEqcZksFiiqYH42t',\n",
+       " 'content': [TextBlock(text='Based on the essay, it seems Paul Graham started Y Combinator (YC) for a few key reasons:\\n\\n1. He had experience as a startup founder with Viaweb and wanted to help other founders avoid mistakes he had made.\\n\\n2. He had ideas about how venture capital could be improved, like making more smaller investments in younger technical founders.\\n\\n3. He was looking for something new to work on after selling Viaweb to Yahoo and trying painting for a while.\\n\\n4. He wanted to gain experience as an investor and thought funding a batch of startups at once would be a good way to do that.\\n\\n5. It started as a \"Summer Founders Program\" to give undergrads an alternative to summer internships, but quickly grew into something more serious.\\n\\n6. He saw an opportunity to scale startup funding by investing in batches of companies at once.\\n\\n7. He was excited by the potential to help create new startups and technologies.\\n\\n8. It allowed him to continue working with his friends/former colleagues Robert Morris and Trevor Blackwell.\\n\\n9. He had built an audience through his essays that provided deal flow for potential investments.\\n\\nSo in summary, it was a combination of wanting to help founders, improve venture capital, gain investing experience, work with friends, and leverage his existing audience/expertise in the startup world. The initial idea evolved quickly from a summer program into a new model for seed investing.', type='text')],\n",
        " 'model': 'claude-3-5-sonnet-20240620',\n",
        " 'role': 'assistant',\n",
        " 'stop_reason': 'end_turn',\n",
        " 'stop_sequence': None,\n",
        " 'type': 'message',\n",
-       " 'usage': Usage(input_tokens=12, output_tokens=313, cache_creation_input_tokens=17470, cache_read_input_tokens=0)}"
+       " 'usage': Usage(input_tokens=4, output_tokens=305, cache_creation_input_tokens=9, cache_read_input_tokens=17467)}"
       ]
      },
      "execution_count": null,
@@ -221,7 +224,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As you can see, `17470` tokens have been cached, as indicated by `cache_creation_input_tokens`.\n",
+    "As you can see, since I've ran this a few different times, `cache_creation_input_tokens` and `cache_read_input_tokens` are both higher than zero, indicating that the text was cached properly.\n",
     "\n",
     "Now, let’s run another query on the same document. It should retrieve the document text from the cache, which will be reflected in `cache_read_input_tokens`."
    ]
@@ -237,13 +240,16 @@
     "    ChatMessage(\n",
     "        role=\"user\",\n",
     "        content=[\n",
-    "            {\n",
-    "                \"text\": f\"{document_text}\",\n",
-    "                \"type\": \"text\",\n",
-    "                \"cache_control\": {\"type\": \"ephemeral\"},\n",
-    "            },\n",
-    "            {\"text\": \"What did Paul Graham do growing up?\", \"type\": \"text\"},\n",
+    "            TextBlock(\n",
+    "                text=f\"{document_text}\",\n",
+    "                type=\"text\",\n",
+    "            ),\n",
+    "            TextBlock(\n",
+    "                text=\"\\n\\nWhat did Paul Graham do growing up?\",\n",
+    "                type=\"text\",\n",
+    "            ),\n",
     "        ],\n",
+    "        additional_kwargs={\"cache_control\": {\"type\": \"ephemeral\"}},\n",
     "    ),\n",
     "]\n",
     "\n",
@@ -260,14 +266,14 @@
     {
      "data": {
       "text/plain": [
-       "{'id': 'msg_01CpwhtuvJ8UR64xSbpxoutZ',\n",
-       " 'content': [TextBlock(text='Based on the essay, here are some key things Paul Graham did growing up:\\n\\n1. As a teenager, he focused mainly on writing and programming outside of school. He tried writing short stories but says they were \"awful\".\\n\\n2. In 9th grade (age 13-14), he started programming on an IBM 1401 computer at his school district\\'s data processing center. He used an early version of Fortran.\\n\\n3. He convinced his father to buy a TRS-80 microcomputer around 1980 when he was in high school. He wrote simple games, a program to predict model rocket flight, and a word processor his father used.\\n\\n4. He planned to study philosophy in college, thinking it was more powerful than other fields. \\n\\n5. In college, he got interested in artificial intelligence after reading a novel featuring an intelligent computer and seeing a documentary about an AI program called SHRDLU.\\n\\n6. He taught himself Lisp programming language in college since there were no AI classes offered.\\n\\n7. For his undergraduate thesis, he reverse-engineered the SHRDLU AI program.\\n\\n8. He graduated college with a degree in \"Artificial Intelligence\" (in quotes on the diploma).\\n\\n9. He applied to grad schools for AI and ended up going to Harvard for graduate studies.\\n\\nSo in summary, his main interests and activities growing up centered around writing, programming, and eventually artificial intelligence as he entered college and graduate school.', type='text')],\n",
+       "{'id': 'msg_011TQgbpBuBkZAJeatVVcqtp',\n",
+       " 'content': [TextBlock(text='Based on the essay, here are some key things Paul Graham did growing up:\\n\\n1. As a teenager, he focused mainly on writing and programming outside of school. He tried writing short stories but says they were \"awful\".\\n\\n2. At age 13-14, he started programming on an IBM 1401 computer at his school district\\'s data processing center. He used an early version of Fortran.\\n\\n3. In high school, he convinced his father to buy a TRS-80 microcomputer around 1980. He wrote simple games, a program to predict model rocket flight, and a word processor his father used.\\n\\n4. He went to college intending to study philosophy, but found it boring. He then decided to switch to studying artificial intelligence (AI).\\n\\n5. In college, he learned Lisp programming language, which expanded his concept of what programming could be. \\n\\n6. For his undergraduate thesis, he reverse-engineered SHRDLU, an early natural language processing program.\\n\\n7. He applied to grad schools for AI and ended up going to Harvard for graduate studies.\\n\\n8. In grad school, he realized AI as practiced then was not going to achieve true intelligence. He pivoted to focusing more on Lisp programming.\\n\\n9. He started writing a book about Lisp hacking while in grad school, which was eventually published in 1993 as \"On Lisp\".\\n\\nSo in summary, his early years were focused on writing, programming (especially Lisp), and studying AI, before he eventually moved on to other pursuits after grad school. The essay provides a detailed account of his intellectual development in these areas.', type='text')],\n",
        " 'model': 'claude-3-5-sonnet-20240620',\n",
        " 'role': 'assistant',\n",
        " 'stop_reason': 'end_turn',\n",
        " 'stop_sequence': None,\n",
        " 'type': 'message',\n",
-       " 'usage': Usage(input_tokens=12, output_tokens=313, cache_creation_input_tokens=0, cache_read_input_tokens=17470)}"
+       " 'usage': Usage(input_tokens=4, output_tokens=356, cache_creation_input_tokens=0, cache_read_input_tokens=17476)}"
       ]
      },
      "execution_count": null,
@@ -289,9 +295,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "llamacloud",
+   "display_name": "llama-index-caVs7DDe-py3.10",
    "language": "python",
-   "name": "llamacloud"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {

diff --git a/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/utils.py b/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/utils.py
@@ -4,7 +4,13 @@
 
 from typing import Dict, Sequence, Tuple
 
-from llama_index.core.base.llms.types import ChatMessage, ChatResponse, MessageRole
+from llama_index.core.base.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ImageBlock,
+    MessageRole,
+    TextBlock,
+)
 
 from anthropic.types import MessageParam, TextBlockParam, ImageBlockParam
 from anthropic.types.tool_result_block_param import ToolResultBlockParam
@@ -130,7 +136,10 @@ def messages_to_anthropic_messages(
     system_prompt = ""
     for message in messages:
         if message.role == MessageRole.SYSTEM:
-            system_prompt += message.content + "\n"
+            # For system messages, concatenate all text blocks
+            for block in message.blocks:
+                if isinstance(block, TextBlock):
+                    system_prompt += block.text + "\n"
         elif message.role == MessageRole.FUNCTION or message.role == MessageRole.TOOL:
             content = ToolResultBlockParam(
                 tool_use_id=message.additional_kwargs["tool_call_id"],
@@ -143,39 +152,42 @@ def messages_to_anthropic_messages(
             )
             anthropic_messages.append(anth_message)
         else:
-            content = []
-            if message.content and isinstance(message.content, list):
-                for item in message.content:
-                    if item and isinstance(item, dict) and item.get("type", None):
-                        if item["type"] == "image":
-                            content.append(ImageBlockParam(**item))
-                        elif "cache_control" in item and item["type"] == "text":
-                            content.append(
-                                PromptCachingBetaTextBlockParam(
-                                    text=item["text"],
-                                    type="text",
-                                    cache_control=PromptCachingBetaCacheControlEphemeralParam(
-                                        type="ephemeral"
-                                    ),
-                                )
-                            )
-                        else:
-                            content.append(TextBlockParam(**item))
-                    else:
-                        content.append(TextBlockParam(text=item, type="text"))
-            elif message.content:
-                content_ = (
-                    PromptCachingBetaTextBlockParam(
-                        text=message.content,
-                        type="text",
-                        cache_control=PromptCachingBetaCacheControlEphemeralParam(
-                            type="ephemeral"
-                        ),
+            content: list[TextBlockParam | ImageBlockParam] = []
+            for block in message.blocks:
+                if isinstance(block, TextBlock):
+                    content_ = (
+                        PromptCachingBetaTextBlockParam(
+                            text=block.text,
+                            type="text",
+                            cache_control=PromptCachingBetaCacheControlEphemeralParam(
+                                type="ephemeral"
+                            ),
+                        )
+                        if "cache_control" in message.additional_kwargs
+                        else TextBlockParam(text=block.text, type="text")
+                    )
+                    content.append(content_)
+                elif isinstance(block, ImageBlock):
+                    # FUTURE: Claude does not support URLs, so we need to always convert to base64
+                    img_bytes = block.resolve_image(as_base64=True).read()
+                    img_str = img_bytes.decode("utf-8")
+
+                    content.append(
+                        ImageBlockParam(
+                            type="image",
+                            source={
+                                "type": "base64",
+                                "media_type": block.image_mimetype,
+                                "data": img_str,
+                            }
+                            if block.image_mimetype
+                            else {
+                                "type": "base64",
+                                "media_type": "image/png",
+                                "data": img_str,
+                            },
+                        )
                     )
-                    if "cache_control" in message.additional_kwargs
-                    else TextBlockParam(text=message.content, type="text")
-                )
-                content.append(content_)
 
             tool_calls = message.additional_kwargs.get("tool_calls", [])
             for tool_call in tool_calls:
@@ -194,7 +206,7 @@ def messages_to_anthropic_messages(
 
             anth_message = MessageParam(
                 role=message.role.value,
-                content=content,  # TODO: type detect for multimodal
+                content=content,
             )
             anthropic_messages.append(anth_message)
     return __merge_common_role_msgs(anthropic_messages), system_prompt.strip()

diff --git a/llama-index-integrations/llms/llama-index-llms-anthropic/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-anthropic/pyproject.toml
@@ -27,12 +27,12 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-anthropic"
 readme = "README.md"
-version = "0.5.0"
+version = "0.6.0"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
 anthropic = {extras = ["bedrock", "vertex"], version = ">=0.39.0"}
-llama-index-core = "^0.12.0"
+llama-index-core = "^0.12.5"
 
 [tool.poetry.group.dev.dependencies]
 ipython = "8.10.0"