Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

integration for caching embeddings #27

Merged
merged 26 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 58 additions & 7 deletions libs/elasticsearch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,13 @@ A caching layer for LLMs that uses Elasticsearch.
Simple example:

```python
from elasticsearch import Elasticsearch
from langchain.globals import set_llm_cache

from langchain_elasticsearch import ElasticsearchCache

es_client = Elasticsearch(hosts="http://localhost:9200")
set_llm_cache(
ElasticsearchCache(
es_connection=es_client,
es_url="http://localhost:9200",
index_name="llm-chat-cache",
metadata={"project": "my_chatgpt_project"},
)
Expand All @@ -153,7 +151,6 @@ The new cache class can be applied also to a pre-existing cache index:
import json
from typing import Any, Dict, List

from elasticsearch import Elasticsearch
from langchain.globals import set_llm_cache
from langchain_core.caches import RETURN_VAL_TYPE

Expand Down Expand Up @@ -185,11 +182,65 @@ class SearchableElasticsearchCache(ElasticsearchCache):
]


es_client = Elasticsearch(hosts="http://localhost:9200")
set_llm_cache(
SearchableElasticsearchCache(es_connection=es_client, index_name="llm-chat-cache")
SearchableElasticsearchCache(
es_url="http://localhost:9200",
index_name="llm-chat-cache"
)
)
```

When overriding the mapping and the document building,
please only make additive modifications, keeping the base mapping intact.
please only make additive modifications, keeping the base mapping intact.

### ElasticsearchEmbeddingsCache

Store and temporarily cache embeddings.

Caching embeddings is obtained by using the [CacheBackedEmbeddings](https://python.langchain.com/docs/modules/data_connection/text_embedding/caching_embeddings), it can be instantiated using `CacheBackedEmbeddings.from_bytes_store` method.

```python
from langchain.embeddings import CacheBackedEmbeddings
from langchain_openai import OpenAIEmbeddings

from langchain_elasticsearch import ElasticsearchEmbeddingsCache

underlying_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

store = ElasticsearchEmbeddingsCache(
es_url="http://localhost:9200",
index_name="llm-chat-cache",
metadata={"project": "my_chatgpt_project"},
namespace="my_chatgpt_project",
)

embeddings = CacheBackedEmbeddings.from_bytes_store(
underlying_embeddings=OpenAIEmbeddings(),
document_embedding_cache=store,
query_embedding_cache=store,
)
```

Similarly to the chat cache, one can subclass `ElasticsearchEmbeddingsCache` in order to index vectors for search.

```python
from typing import Any, Dict, List
from langchain_elasticsearch import ElasticsearchEmbeddingsCache

class SearchableElasticsearchStore(ElasticsearchEmbeddingsCache):
@property
def mapping(self) -> Dict[str, Any]:
mapping = super().mapping
mapping["mappings"]["properties"]["vector"] = {
"type": "dense_vector",
"dims": 1536,
"index": True,
"similarity": "dot_product",
}
return mapping

def build_document(self, llm_input: str, vector: List[float]) -> Dict[str, Any]:
body = super().build_document(llm_input, vector)
body["vector"] = vector
return body
```
6 changes: 5 additions & 1 deletion libs/elasticsearch/langchain_elasticsearch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
SparseVectorStrategy,
)

from langchain_elasticsearch.cache import ElasticsearchCache
from langchain_elasticsearch.cache import (
ElasticsearchCache,
ElasticsearchEmbeddingsCache,
)
from langchain_elasticsearch.chat_history import ElasticsearchChatMessageHistory
from langchain_elasticsearch.embeddings import ElasticsearchEmbeddings
from langchain_elasticsearch.retrievers import ElasticsearchRetriever
Expand All @@ -23,6 +26,7 @@
"ElasticsearchCache",
"ElasticsearchChatMessageHistory",
"ElasticsearchEmbeddings",
"ElasticsearchEmbeddingsCache",
"ElasticsearchRetriever",
"ElasticsearchStore",
# retrieval strategies
Expand Down
13 changes: 8 additions & 5 deletions libs/elasticsearch/langchain_elasticsearch/_utilities.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import logging
from enum import Enum

from elasticsearch import BadRequestError, ConflictError, Elasticsearch, NotFoundError
from elasticsearch import Elasticsearch, exceptions
from langchain_core import __version__ as langchain_version

logger = logging.getLogger(__name__)


class DistanceStrategy(str, Enum):
"""Enumerator of the Distance strategies for calculating distances
Expand All @@ -29,15 +32,15 @@ def model_must_be_deployed(client: Elasticsearch, model_id: str) -> None:
try:
dummy = {"x": "y"}
client.ml.infer_trained_model(model_id=model_id, docs=[dummy])
except NotFoundError as err:
except exceptions.NotFoundError as err:
raise err
except ConflictError as err:
raise NotFoundError(
except exceptions.ConflictError as err:
raise exceptions.NotFoundError(
f"model '{model_id}' not found, please deploy it first",
meta=err.meta,
body=err.body,
) from err
except BadRequestError:
except exceptions.BadRequestError:
# This error is expected because we do not know the expected document
# shape and just use a dummy doc above.
pass
Loading
Loading