fix typo

langchain-ai · May 13, 2024 · 86cbac0 · 86cbac0
1 parent 2de4eff
commit 86cbac0
Show file tree

Hide file tree

Showing 5 changed files with 68 additions and 12 deletions.
diff --git a/libs/elasticsearch/README.md b/libs/elasticsearch/README.md
@@ -192,4 +192,59 @@ set_llm_cache(
 ```
 
 When overriding the mapping and the document building, 
-please only make additive modifications, keeping the base mapping intact.
+please only make additive modifications, keeping the base mapping intact.
+
+
+
+## Embeddings cache usage
+
+Caching embeddings is obtained by using the [CacheBackedEmbeddings](https://python.langchain.com/docs/modules/data_connection/text_embedding/caching_embeddings),
+in a slightly different way than the official documentation.
+
+```python
+from langchain_elasticsearch import ElasticsearchStoreEmbeddings
+from elasticsearch import Elasticsearch
+from langchain.embeddings import CacheBackedEmbeddings
+from langchain_openai import OpenAIEmbeddings
+
+es_client = Elasticsearch(hosts="http://localhost:9200")
+
+underlying_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+store = ElasticsearchStoreEmbeddings(
+   es_client=es_client,
+   es_index="llm-embeddings-cache",
+   namespace=underlying_embeddings.model,
+   metadata={"project": "my_llm_project"}
+)
+cached_embeddings = CacheBackedEmbeddings(
+   underlying_embeddings,
+   store
+)
+```
+
+Similarly to the chat cache, one can subclass `ElasticsearchStore` in order to index vectors for search.
+
+```python
+from langchain_elasticsearch import ElasticsearchStoreEmbeddings
+from typing import Any, Dict, List
+
+
+class SearchableElasticsearchStore(ElasticsearchStoreEmbeddings):
+
+   @property
+   def mapping(self) -> Dict[str, Any]:
+      mapping = super().mapping
+      mapping["mappings"]["properties"]["vector"] = {"type": "dense_vector", "dims": 1536, "index": True,
+                                                     "similarity": "dot_product"}
+      return mapping
+
+   def build_document(self, llm_input: str, vector: List[float]) -> Dict[str, Any]:
+      body = super().build_document(llm_input, vector)
+      body["vector"] = vector
+      return body
+```
+
+Be aware that `CacheBackedEmbeddings` does 
+[not currently support caching queries](https://api.python.langchain.com/en/latest/embeddings/langchain.embeddings.cache.CacheBackedEmbeddings.html#langchain.embeddings.cache.CacheBackedEmbeddings.embed_query),
+this means that text queries, for vector searches, won't be cached.
+However, by overriding the `embed_query` method one should be able to easily implement it.
diff --git a/libs/elasticsearch/langchain_elasticsearch/__init__.py b/libs/elasticsearch/langchain_elasticsearch/__init__.py
@@ -9,7 +9,7 @@
 
 from langchain_elasticsearch.cache import (
     ElasticsearchCache,
-    ElasticsearchCacheBackedEmbeddings,
+    ElasticsearchStoreEmbeddings,
 )
 from langchain_elasticsearch.chat_history import ElasticsearchChatMessageHistory
 from langchain_elasticsearch.embeddings import ElasticsearchEmbeddings
@@ -24,7 +24,7 @@
 
 __all__ = [
     "ElasticsearchCache",
-    "ElasticsearchCacheBackedEmbeddings",
+    "ElasticsearchStoreEmbeddings",
     "ElasticsearchChatMessageHistory",
     "ElasticsearchEmbeddings",
     "ElasticsearchRetriever",

diff --git a/libs/elasticsearch/langchain_elasticsearch/_utilities.py b/libs/elasticsearch/langchain_elasticsearch/_utilities.py
@@ -17,7 +17,7 @@
 logger = logging.getLogger(__name__)
 
 
-class ElasticsearchIndexer:
+class ElasticsearchCacheIndexer:
     """Mixin for Elasticsearch clients"""
 
     def __init__(
@@ -54,6 +54,7 @@ def __init__(
                 for filtering purposes. This must be JSON serializable in an
                 Elasticsearch document. Default to None.
             namespace: Optional namespace to use for the cache. Default to None.
+                utilized only by CacheBackedEmbeddings.
             es_connection: Optional pre-existing Elasticsearch connection.
             es_url: URL of the Elasticsearch instance to connect to.
             es_cloud_id: Cloud ID of the Elasticsearch instance to connect to.

diff --git a/libs/elasticsearch/langchain_elasticsearch/cache.py b/libs/elasticsearch/langchain_elasticsearch/cache.py
@@ -20,15 +20,15 @@
 from langchain_core.load import dumps, loads
 from langchain_core.stores import BaseStore
 
-from langchain_elasticsearch._utilities import ElasticsearchIndexer
+from langchain_elasticsearch._utilities import ElasticsearchCacheIndexer
 
 if TYPE_CHECKING:
-    pass
+    from elasticsearch import Elasticsearch
 
 logger = logging.getLogger(__name__)
 
 
-class ElasticsearchCache(BaseCache, ElasticsearchIndexer):
+class ElasticsearchCache(BaseCache, ElasticsearchCacheIndexer):
     """An Elasticsearch cache integration for LLMs."""
 
     @cached_property
@@ -115,8 +115,8 @@ def clear(self, **kwargs: Any) -> None:
         )
 
 
-class ElasticsearchCacheBackedEmbeddings(
-    BaseStore[str, List[float]], ElasticsearchIndexer
+class ElasticsearchStoreEmbeddings(
+    BaseStore[str, List[float]], ElasticsearchCacheIndexer
 ):
     @cached_property
     def mapping(self) -> Dict[str, Any]:

diff --git a/libs/elasticsearch/tests/conftest.py b/libs/elasticsearch/tests/conftest.py
@@ -10,7 +10,7 @@
 
 from langchain_elasticsearch import (
     ElasticsearchCache,
-    ElasticsearchCacheBackedEmbeddings,
+    ElasticsearchStoreEmbeddings,
 )
 
 
@@ -24,8 +24,8 @@ def es_client_fx() -> Generator[MagicMock, None, None]:
 @pytest.fixture
 def es_store_fx(
     es_client_fx: MagicMock,
-) -> Generator[ElasticsearchCacheBackedEmbeddings, None, None]:
-    yield ElasticsearchCacheBackedEmbeddings(
+) -> Generator[ElasticsearchStoreEmbeddings, None, None]:
+    yield ElasticsearchStoreEmbeddings(
         es_connection=es_client_fx,
         index_name="test_index",
         store_input=True,