From ea9f888f023d4a86c2d6a1d4df9b5bee083f6907 Mon Sep 17 00:00:00 2001
From: DarkLight1337 <tlleungac@connect.ust.hk>
Date: Wed, 25 Dec 2024 02:25:27 +0000
Subject: [PATCH] Also cache by model ID

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 vllm/multimodal/processing.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py
index d79b0f1a8bf30..b0307f10aa729 100644
--- a/vllm/multimodal/processing.py
+++ b/vllm/multimodal/processing.py
@@ -676,23 +676,27 @@ def _hash_kwargs(self, **kwargs: object) -> str:
 
     def get(
         self,
+        model_id: str,
         modality: str,
         input_item: object,
         input_kwargs: Mapping[str, object],
     ) -> Optional[Mapping[str, MultiModalFieldItem]]:
         self._maybe_log_cache_stats()
 
-        cache_key = self._hash_kwargs(**{modality: input_item}, **input_kwargs)
+        cache_key = self._hash_kwargs(model_id=model_id,
+                                      **{modality: input_item}, **input_kwargs)
         return self._cache.get(cache_key)
 
     def put(
         self,
+        model_id: str,
         modality: str,
         input_item: object,
         input_kwargs: Mapping[str, object],
         output_kwargs: Mapping[str, MultiModalFieldItem],
     ) -> None:
-        cache_key = self._hash_kwargs(**{modality: input_item}, **input_kwargs)
+        cache_key = self._hash_kwargs(model_id=model_id,
+                                      **{modality: input_item}, **input_kwargs)
         self._cache.put(cache_key, output_kwargs)
 
 
@@ -886,6 +890,7 @@ def _cached_apply_hf_processor(
         caching the results and reusing cached results.
         """
         cache = self.cache
+        model_id = self.ctx.model_config.model
 
         if cache is None or mm_data_items.has_embedding_inputs():
             return self._apply_hf_processor(
@@ -896,7 +901,7 @@ def _cached_apply_hf_processor(
 
         mm_maybe_cached_field_items = {
             modality: [
-                cache.get(modality, item, hf_processor_mm_kwargs)
+                cache.get(model_id, modality, item, hf_processor_mm_kwargs)
                 for item in items
             ]
             for modality, items in mm_data_items.items()
@@ -936,6 +941,7 @@ def _cached_apply_hf_processor(
                     )
 
                     cache.put(
+                        model_id,
                         modality,
                         mm_data_items[modality][idx],
                         hf_processor_mm_kwargs,