Skip to content

Commit

Permalink
Merge pull request #146 from chroma-core/indexLoadingStability
Browse files Browse the repository at this point in the history
Make sure the index is loaded before checking dimensionality
  • Loading branch information
jeffchuber authored Feb 20, 2023
2 parents e657fa7 + 2678df8 commit b7e5034
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 3 deletions.
9 changes: 6 additions & 3 deletions chromadb/db/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,9 +445,15 @@ def get_nearest_neighbors(
collection_name=None,
collection_uuid=None,
) -> Tuple[List[List[uuid.UUID]], List[List[float]]]:

# Either the collection name or the collection uuid must be provided
if collection_name == None and collection_uuid == None:
raise TypeError("Arguments collection_name and collection_uuid cannot both be None")

if collection_name is not None:
collection_uuid = self.get_collection_uuid_from_name(collection_name)

self._idx.load_if_not_loaded(collection_uuid)

idx_metadata = self._idx.get_metadata()
# Check query embeddings dimensionality
Expand All @@ -462,9 +468,6 @@ def get_nearest_neighbors(
f"Number of requested results {n_results} cannot be greater than number of elements in index {idx_metadata['elements']}"
)

if collection_name is not None:
collection_uuid = self.get_collection_uuid_from_name(collection_name)

if len(where) != 0 or len(where_document) != 0:
results = self.get(
collection_uuid=collection_uuid, where=where, where_document=where_document
Expand Down
4 changes: 4 additions & 0 deletions chromadb/db/index/hnswlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ def _save(self):

logger.debug("Index saved to {self._save_folder}/index.bin")

def load_if_not_loaded(self, collection_uuid):
if self._collection_uuid != collection_uuid:
self._load(collection_uuid)

def _load(self, collection_uuid):
# if we are calling load, we clearly need a different index than the one we have
self._index = None
Expand Down
29 changes: 29 additions & 0 deletions chromadb/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,17 @@ def local_persist_api():
)
)

# https://docs.pytest.org/en/6.2.x/fixture.html#fixtures-can-be-requested-more-than-once-per-test-return-values-are-cached
@pytest.fixture
def local_persist_api_cache_bust():
return chromadb.Client(
Settings(
chroma_api_impl="local",
chroma_db_impl="duckdb+parquet",
persist_directory=tempfile.gettempdir() + "/test_server",
)
)


@pytest.fixture
def fastapi_integration_api():
Expand Down Expand Up @@ -96,6 +107,24 @@ def fastapi_server():
print("Including integration tests only")
test_apis = [fastapi_integration_api]

@pytest.mark.parametrize("api_fixture", [local_persist_api])
def test_persist_index_loading(api_fixture, request):
api = request.getfixturevalue("local_persist_api")
api.reset()
collection = api.create_collection('test')
collection.add(ids="id1", documents="hello")

api.persist()
del api

api2 = request.getfixturevalue("local_persist_api_cache_bust")
collection = api2.get_collection('test')

nn = collection.query(query_texts="hello", n_results=1)
for key in nn.keys():
assert len(nn[key]) == 1


@pytest.mark.parametrize("api_fixture", [local_persist_api])
def test_persist(api_fixture, request):
api = request.getfixturevalue(api_fixture.__name__)
Expand Down

0 comments on commit b7e5034

Please sign in to comment.