Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Notebook Documentation prompt completions fixes #16397

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,33 @@
# ChangeLog

## [2024-09-04]

### `llama-index-core` [0.11.5]

- remove unneeded assert in property graph retriever (#15832)
- make simple property graphs serialize again (#15833)
- fix json schema for fastapi return types on core components (#15816)

### `llama-index-llms-nvidia` [0.2.2]

- NVIDIA llm: Add Completion for starcoder models (#15802)

### `llama-index-llms-ollama` [0.3.1]

- add ollama response usage (#15773)

### `llama-index-readers-dashscope` [0.2.1]

- fix pydantic v2 validation errors (#15800)

### `llama-index-readers-discord` [0.2.1]

- fix: convert Document id from int to string in DiscordReader (#15806)

### `llama-index-vector-stores-mariadb` [0.1.0]

- Add MariaDB vector store integration package (#15564)

## [2024-09-02]

### `llama-index-core` [0.11.4]
Expand Down
28 changes: 28 additions & 0 deletions docs/docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,33 @@
# ChangeLog

## [2024-09-04]

### `llama-index-core` [0.11.5]

- remove unneeded assert in property graph retriever (#15832)
- make simple property graphs serialize again (#15833)
- fix json schema for fastapi return types on core components (#15816)

### `llama-index-llms-nvidia` [0.2.2]

- NVIDIA llm: Add Completion for starcoder models (#15802)

### `llama-index-llms-ollama` [0.3.1]

- add ollama response usage (#15773)

### `llama-index-readers-dashscope` [0.2.1]

- fix pydantic v2 validation errors (#15800)

### `llama-index-readers-discord` [0.2.1]

- fix: convert Document id from int to string in DiscordReader (#15806)

### `llama-index-vector-stores-mariadb` [0.1.0]

- Add MariaDB vector store integration package (#15564)

## [2024-09-02]

### `llama-index-core` [0.11.4]
Expand Down
4 changes: 4 additions & 0 deletions docs/docs/api_reference/storage/vector_store/mariadb.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
::: llama_index.vector_stores.mariadb
options:
members:
- MariaDBVectorStore
3 changes: 3 additions & 0 deletions docs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ nav:
- ./examples/llm/nvidia.ipynb
- ./examples/llm/nvidia_nim.ipynb
- ./examples/llm/nvidia_tensorrt.ipynb
- ./examples/llm/nvidia_text_completion.ipynb
- ./examples/llm/nvidia_triton.ipynb
- ./examples/llm/oci_genai.ipynb
- ./examples/llm/octoai.ipynb
Expand Down Expand Up @@ -1470,6 +1471,7 @@ nav:
- ./api_reference/storage/vector_store/lancedb.md
- ./api_reference/storage/vector_store/lantern.md
- ./api_reference/storage/vector_store/lindorm.md
- ./api_reference/storage/vector_store/mariadb.md
- ./api_reference/storage/vector_store/metal.md
- ./api_reference/storage/vector_store/milvus.md
- ./api_reference/storage/vector_store/mongodb.md
Expand Down Expand Up @@ -2151,6 +2153,7 @@ plugins:
- ../llama-index-integrations/embeddings/llama-index-embeddings-xinference
- ../llama-index-integrations/postprocessor/llama-index-postprocessor-xinference-rerank
- ../llama-index-integrations/selectors/llama-index-selectors-notdiamond
- ../llama-index-integrations/vector_stores/llama-index-vector-stores-mariadb
- redirects:
redirect_maps:
./api/llama_index.vector_stores.MongoDBAtlasVectorSearch.html: api_reference/storage/vector_store/mongodb.md
Expand Down
2 changes: 1 addition & 1 deletion llama-index-core/llama_index/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Init file of LlamaIndex."""

__version__ = "0.11.4"
__version__ = "0.11.5"

import logging
from logging import NullHandler
Expand Down
2 changes: 1 addition & 1 deletion llama-index-core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ name = "llama-index-core"
packages = [{include = "llama_index"}]
readme = "README.md"
repository = "https://github.com/run-llama/llama_index"
version = "0.11.4"
version = "0.11.5"

[tool.poetry.dependencies]
SQLAlchemy = {extras = ["asyncio"], version = ">=1.4.49"}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,14 @@ def __init__(
)
self._aclient._custom_headers = {"User-Agent": "llama-index-embeddings-nvidia"}

if not model:
self.model = model
if self._is_hosted and not self.model:
self.model = DEFAULT_MODEL
elif not self._is_hosted and not self.model:
self.__get_default_model()

self._validate_model(self.model) ## validate model

def __get_default_model(self) -> None:
"""Set default model."""
if not self._is_hosted:
Expand Down Expand Up @@ -183,6 +188,29 @@ def _validate_url(self, base_url):
warnings.warn(f"{expected_format} Rest is ignored")
return base_url.strip("/")

def _validate_model(self, model_name: str) -> None:
"""
Validates compatibility of the hosted model with the client.

Args:
model_name (str): The name of the model.

Raises:
ValueError: If the model is incompatible with the client.
"""
if self._is_hosted:
if model_name not in MODEL_ENDPOINT_MAP:
if model_name in [model.id for model in self._client.models.list()]:
warnings.warn(f"Unable to determine validity of {model_name}")
else:
raise ValueError(
f"Model {model_name} is incompatible with client {self.class_name()}. "
f"Please check `{self.class_name()}.available_models()`."
)
else:
if model_name not in [model.id for model in self.available_models]:
raise ValueError(f"No locally hosted {model_name} was found.")

@property
def available_models(self) -> List[Model]:
"""Get available models."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ def test_embedding_class():
def test_nvidia_embedding_param_setting():
emb = NVIDIAEmbedding(
api_key="BOGUS",
model="test-model",
model="NV-Embed-QA",
truncate="END",
timeout=20,
max_retries=10,
embed_batch_size=15,
)

assert emb.model == "test-model"
assert emb.model == "NV-Embed-QA"
assert emb.truncate == "END"
assert emb._client.timeout == 20
assert emb._client.max_retries == 10
Expand Down Expand Up @@ -90,3 +90,28 @@ def test_nvidia_embedding_callback(mock_integration_api):
def test_nvidia_embedding_throws_with_invalid_key(mock_integration_api):
emb = NVIDIAEmbedding(api_key="invalid")
emb.get_text_embedding("hi")


# @pytest.mark.parametrize("model", list(MODEL_ENDPOINT_MAP.keys()))
# def test_model_compatible_client_model(model: str) -> None:
# NVIDIAEmbedding(api_key="BOGUS", model=model)


def test_model_incompatible_client_model() -> None:
model_name = "x"
err_msg = (
f"Model {model_name} is incompatible with client NVIDIAEmbedding. "
f"Please check `NVIDIAEmbedding.available_models()`."
)
with pytest.raises(ValueError) as msg:
NVIDIAEmbedding(api_key="BOGUS", model=model_name)
assert err_msg == str(msg.value)


def test_model_incompatible_client_known_model() -> None:
model_name = "google/deplot"
warn_msg = f"Unable to determine validity"
with pytest.warns(UserWarning) as msg:
NVIDIAEmbedding(api_key="BOGUS", model=model_name)
assert len(msg) == 1
assert warn_msg in str(msg[0].message)
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from llama_index.llms.nvidia.utils import (
is_nvidia_function_calling_model,
is_chat_model,
ALL_MODELS,
)

from llama_index.llms.openai_like import OpenAILike
Expand Down Expand Up @@ -110,17 +111,20 @@ def __init__(
is_function_calling_model=is_nvidia_function_calling_model(model),
**kwargs,
)
self.model = model
self._is_hosted = base_url in KNOWN_URLS

if self._is_hosted and api_key == "NO_API_KEY_PROVIDED":
warnings.warn(
"An API key is required for the hosted NIM. This will become an error in 0.2.0.",
)

if not model:
self.model = model
if self._is_hosted and not self.model:
self.model = DEFAULT_MODEL
elif not self._is_hosted and not self.model:
self.__get_default_model()

self._validate_model(self.model) ## validate model

def __get_default_model(self):
"""Set default model."""
if not self._is_hosted:
Expand Down Expand Up @@ -163,6 +167,29 @@ def _validate_url(self, base_url):
raise ValueError(f"Invalid base_url, {expected_format}")
return urlunparse((result.scheme, result.netloc, "v1", "", "", ""))

def _validate_model(self, model_name: str) -> None:
"""
Validates compatibility of the hosted model with the client.

Args:
model_name (str): The name of the model.

Raises:
ValueError: If the model is incompatible with the client.
"""
if self._is_hosted:
if model_name not in ALL_MODELS:
if model_name in [model.id for model in self.available_models]:
warnings.warn(f"Unable to determine validity of {model_name}")
else:
raise ValueError(
f"Model {model_name} is incompatible with client {self.class_name()}. "
f"Please check `{self.class_name()}.available_models()`."
)
else:
if model_name not in [model.id for model in self.available_models]:
raise ValueError(f"No locally hosted {model_name} was found.")

@property
def available_models(self) -> List[Model]:
models = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@

COMPLETION_MODELS = ("bigcode/starcoder2-7b", "bigcode/starcoder2-15b")

ALL_MODELS = (
tuple(API_CATALOG_MODELS.keys()) + NVIDIA_FUNTION_CALLING_MODELS + COMPLETION_MODELS
)


def is_chat_model(modelname: str):
return modelname not in COMPLETION_MODELS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def mock_unknown_urls(httpx_mock: HTTPXMock, base_url: str):
mock_response = {
"data": [
{
"id": "dummy",
"id": "meta/llama3-8b-instruct",
"object": "model",
"created": 1234567890,
"owned_by": "OWNER",
Expand Down Expand Up @@ -63,16 +63,18 @@ def test_mode_switch_nim_with_url_deprecated():

@pytest.mark.parametrize("base_url", ["https://test_url/v1/"])
def test_mode_switch_param_setting_deprecated(base_url):
instance = Interface(model="dummy")
instance = Interface(model="meta/llama3-8b-instruct")

with pytest.warns(DeprecationWarning):
instance1 = instance.mode("nim", base_url=base_url)
assert instance1.model == "dummy"
assert instance1.model == "meta/llama3-8b-instruct"
assert str(instance1.api_base) == base_url

with pytest.warns(DeprecationWarning):
instance2 = instance1.mode("nvidia", api_key="test", model="dummy-2")
assert instance2.model == "dummy-2"
instance2 = instance1.mode(
"nvidia", api_key="test", model="meta/llama3-15b-instruct"
)
assert instance2.model == "meta/llama3-15b-instruct"
assert str(instance2.api_base) == BASE_URL
assert instance2.api_key == "test"

Expand Down
Loading