Skip to content

Commit

Permalink
fix typo
Browse files Browse the repository at this point in the history
  • Loading branch information
Gabriele Ghisleni committed May 13, 2024
1 parent 2de4eff commit 86cbac0
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 12 deletions.
57 changes: 56 additions & 1 deletion libs/elasticsearch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,4 +192,59 @@ set_llm_cache(
```

When overriding the mapping and the document building,
please only make additive modifications, keeping the base mapping intact.
please only make additive modifications, keeping the base mapping intact.



## Embeddings cache usage

Caching embeddings is obtained by using the [CacheBackedEmbeddings](https://python.langchain.com/docs/modules/data_connection/text_embedding/caching_embeddings),
in a slightly different way than the official documentation.

```python
from langchain_elasticsearch import ElasticsearchStoreEmbeddings
from elasticsearch import Elasticsearch
from langchain.embeddings import CacheBackedEmbeddings
from langchain_openai import OpenAIEmbeddings

es_client = Elasticsearch(hosts="http://localhost:9200")

underlying_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
store = ElasticsearchStoreEmbeddings(
es_client=es_client,
es_index="llm-embeddings-cache",
namespace=underlying_embeddings.model,
metadata={"project": "my_llm_project"}
)
cached_embeddings = CacheBackedEmbeddings(
underlying_embeddings,
store
)
```

Similarly to the chat cache, one can subclass `ElasticsearchStore` in order to index vectors for search.

```python
from langchain_elasticsearch import ElasticsearchStoreEmbeddings
from typing import Any, Dict, List


class SearchableElasticsearchStore(ElasticsearchStoreEmbeddings):

@property
def mapping(self) -> Dict[str, Any]:
mapping = super().mapping
mapping["mappings"]["properties"]["vector"] = {"type": "dense_vector", "dims": 1536, "index": True,
"similarity": "dot_product"}
return mapping

def build_document(self, llm_input: str, vector: List[float]) -> Dict[str, Any]:
body = super().build_document(llm_input, vector)
body["vector"] = vector
return body
```

Be aware that `CacheBackedEmbeddings` does
[not currently support caching queries](https://api.python.langchain.com/en/latest/embeddings/langchain.embeddings.cache.CacheBackedEmbeddings.html#langchain.embeddings.cache.CacheBackedEmbeddings.embed_query),
this means that text queries, for vector searches, won't be cached.
However, by overriding the `embed_query` method one should be able to easily implement it.
4 changes: 2 additions & 2 deletions libs/elasticsearch/langchain_elasticsearch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from langchain_elasticsearch.cache import (
ElasticsearchCache,
ElasticsearchCacheBackedEmbeddings,
ElasticsearchStoreEmbeddings,
)
from langchain_elasticsearch.chat_history import ElasticsearchChatMessageHistory
from langchain_elasticsearch.embeddings import ElasticsearchEmbeddings
Expand All @@ -24,7 +24,7 @@

__all__ = [
"ElasticsearchCache",
"ElasticsearchCacheBackedEmbeddings",
"ElasticsearchStoreEmbeddings",
"ElasticsearchChatMessageHistory",
"ElasticsearchEmbeddings",
"ElasticsearchRetriever",
Expand Down
3 changes: 2 additions & 1 deletion libs/elasticsearch/langchain_elasticsearch/_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
logger = logging.getLogger(__name__)


class ElasticsearchIndexer:
class ElasticsearchCacheIndexer:
"""Mixin for Elasticsearch clients"""

def __init__(
Expand Down Expand Up @@ -54,6 +54,7 @@ def __init__(
for filtering purposes. This must be JSON serializable in an
Elasticsearch document. Default to None.
namespace: Optional namespace to use for the cache. Default to None.
utilized only by CacheBackedEmbeddings.
es_connection: Optional pre-existing Elasticsearch connection.
es_url: URL of the Elasticsearch instance to connect to.
es_cloud_id: Cloud ID of the Elasticsearch instance to connect to.
Expand Down
10 changes: 5 additions & 5 deletions libs/elasticsearch/langchain_elasticsearch/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@
from langchain_core.load import dumps, loads
from langchain_core.stores import BaseStore

from langchain_elasticsearch._utilities import ElasticsearchIndexer
from langchain_elasticsearch._utilities import ElasticsearchCacheIndexer

if TYPE_CHECKING:
pass
from elasticsearch import Elasticsearch

logger = logging.getLogger(__name__)


class ElasticsearchCache(BaseCache, ElasticsearchIndexer):
class ElasticsearchCache(BaseCache, ElasticsearchCacheIndexer):
"""An Elasticsearch cache integration for LLMs."""

@cached_property
Expand Down Expand Up @@ -115,8 +115,8 @@ def clear(self, **kwargs: Any) -> None:
)


class ElasticsearchCacheBackedEmbeddings(
BaseStore[str, List[float]], ElasticsearchIndexer
class ElasticsearchStoreEmbeddings(
BaseStore[str, List[float]], ElasticsearchCacheIndexer
):
@cached_property
def mapping(self) -> Dict[str, Any]:
Expand Down
6 changes: 3 additions & 3 deletions libs/elasticsearch/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from langchain_elasticsearch import (
ElasticsearchCache,
ElasticsearchCacheBackedEmbeddings,
ElasticsearchStoreEmbeddings,
)


Expand All @@ -24,8 +24,8 @@ def es_client_fx() -> Generator[MagicMock, None, None]:
@pytest.fixture
def es_store_fx(
es_client_fx: MagicMock,
) -> Generator[ElasticsearchCacheBackedEmbeddings, None, None]:
yield ElasticsearchCacheBackedEmbeddings(
) -> Generator[ElasticsearchStoreEmbeddings, None, None]:
yield ElasticsearchStoreEmbeddings(
es_connection=es_client_fx,
index_name="test_index",
store_input=True,
Expand Down

0 comments on commit 86cbac0

Please sign in to comment.