From 6aaa238b4e5b5b8f28a3178db6f00655a712aa2e Mon Sep 17 00:00:00 2001 From: Li Jiang Date: Tue, 24 Sep 2024 03:19:23 +0800 Subject: [PATCH] Fix issue caused by chromadb error type change (#3557) * Fix chromadb error type * Update docs * Improve robustness * Fix tests * Fix docs * Fix docs * Fix docs * Fix docs --- autogen/agentchat/contrib/vectordb/chromadb.py | 7 ++++++- notebook/agentchat_RetrieveChat.ipynb | 4 +++- test/agentchat/contrib/vectordb/test_chromadb.py | 11 +++++++++-- website/blog/2023-10-18-RetrieveChat/index.mdx | 4 +++- website/docs/installation/Optional-Dependencies.md | 1 + website/docs/topics/retrieval_augmentation.md | 1 + 6 files changed, 23 insertions(+), 5 deletions(-) diff --git a/autogen/agentchat/contrib/vectordb/chromadb.py b/autogen/agentchat/contrib/vectordb/chromadb.py index 1ed8708409d3..bef4a1090219 100644 --- a/autogen/agentchat/contrib/vectordb/chromadb.py +++ b/autogen/agentchat/contrib/vectordb/chromadb.py @@ -14,6 +14,11 @@ except ImportError: raise ImportError("Please install chromadb: `pip install chromadb`") +try: + from chromadb.errors import ChromaError +except ImportError: + ChromaError = Exception + CHROMADB_MAX_BATCH_SIZE = os.environ.get("CHROMADB_MAX_BATCH_SIZE", 40000) logger = get_logger(__name__) @@ -84,7 +89,7 @@ def create_collection( collection = self.active_collection else: collection = self.client.get_collection(collection_name, embedding_function=self.embedding_function) - except ValueError: + except (ValueError, ChromaError): collection = None if collection is None: return self.client.create_collection( diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb index eee192c4f826..0b829835a0a3 100644 --- a/notebook/agentchat_RetrieveChat.ipynb +++ b/notebook/agentchat_RetrieveChat.ipynb @@ -31,6 +31,8 @@ "pip install pyautogen[retrievechat] flaml[automl]\n", "```\n", "\n", + "*You'll need to install `chromadb<=0.5.0` if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551).*\n", + "\n", "For more information, please refer to the [installation guide](/docs/installation/).\n", ":::\n", "````" @@ -2785,7 +2787,7 @@ ] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "autogen312", "language": "python", "name": "python3" }, diff --git a/test/agentchat/contrib/vectordb/test_chromadb.py b/test/agentchat/contrib/vectordb/test_chromadb.py index ee4886f5154d..19b365db10ba 100644 --- a/test/agentchat/contrib/vectordb/test_chromadb.py +++ b/test/agentchat/contrib/vectordb/test_chromadb.py @@ -15,6 +15,11 @@ else: skip = False +try: + from chromadb.errors import ChromaError +except ImportError: + ChromaError = Exception + @pytest.mark.skipif(skip, reason="dependency is not installed") def test_chromadb(): @@ -26,12 +31,14 @@ def test_chromadb(): # test_delete_collection db.delete_collection(collection_name) - pytest.raises(ValueError, db.get_collection, collection_name) + pytest.raises((ValueError, ChromaError), db.get_collection, collection_name) # test more create collection collection = db.create_collection(collection_name, overwrite=False, get_or_create=False) assert collection.name == collection_name - pytest.raises(ValueError, db.create_collection, collection_name, overwrite=False, get_or_create=False) + pytest.raises( + (ValueError, ChromaError), db.create_collection, collection_name, overwrite=False, get_or_create=False + ) collection = db.create_collection(collection_name, overwrite=True, get_or_create=False) assert collection.name == collection_name collection = db.create_collection(collection_name, overwrite=False, get_or_create=True) diff --git a/website/blog/2023-10-18-RetrieveChat/index.mdx b/website/blog/2023-10-18-RetrieveChat/index.mdx index 91b8b5012a3b..d3ad6aff3287 100644 --- a/website/blog/2023-10-18-RetrieveChat/index.mdx +++ b/website/blog/2023-10-18-RetrieveChat/index.mdx @@ -4,7 +4,7 @@ authors: thinkall tags: [LLM, RAG] --- -*Last update: August 14, 2024; AutoGen version: v0.2.35* +*Last update: September 23, 2024; AutoGen version: v0.2.35* ![RAG Architecture](img/retrievechat-arch.png) @@ -57,6 +57,8 @@ Please install pyautogen with the [retrievechat] option before using RAG agents. pip install "pyautogen[retrievechat]" ``` +*You'll need to install `chromadb<=0.5.0` if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551).* + RetrieveChat can handle various types of documents. By default, it can process plain text and PDF files, including formats such as 'txt', 'json', 'csv', 'tsv', 'md', 'html', 'htm', 'rtf', 'rst', 'jsonl', 'log', 'xml', 'yaml', 'yml' and 'pdf'. diff --git a/website/docs/installation/Optional-Dependencies.md b/website/docs/installation/Optional-Dependencies.md index 2d0067c9950e..820b8f18827f 100644 --- a/website/docs/installation/Optional-Dependencies.md +++ b/website/docs/installation/Optional-Dependencies.md @@ -49,6 +49,7 @@ Example notebooks: ```bash pip install "pyautogen[retrievechat]" ``` +*You'll need to install `chromadb<=0.5.0` if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551).* Alternatively `pyautogen` also supports PGVector and Qdrant which can be installed in place of ChromaDB, or alongside it. diff --git a/website/docs/topics/retrieval_augmentation.md b/website/docs/topics/retrieval_augmentation.md index 3c428f164868..7fdd39f31f92 100644 --- a/website/docs/topics/retrieval_augmentation.md +++ b/website/docs/topics/retrieval_augmentation.md @@ -56,6 +56,7 @@ ragproxyagent.initiate_chat( assistant, message=ragproxyagent.message_generator, problem=code_problem, search_string="spark" ) # search_string is used as an extra filter for the embeddings search, in this case, we only want to search documents that contain "spark". ``` +*You'll need to install `chromadb<=0.5.0` if you see issue like [#3551](https://github.com/microsoft/autogen/issues/3551).* ## Example Setup: RAG with Retrieval Augmented Agents with PGVector The following is an example setup demonstrating how to create retrieval augmented agents in AutoGen: