diff --git a/evadb/catalog/services/index_catalog_service.py b/evadb/catalog/services/index_catalog_service.py index 852bcd9f07..b2a907a777 100644 --- a/evadb/catalog/services/index_catalog_service.py +++ b/evadb/catalog/services/index_catalog_service.py @@ -74,7 +74,11 @@ def delete_entry_by_name(self, name: str): index_metadata = index_obj.as_dataclass() # clean up the on disk data if os.path.exists(index_metadata.save_file_path): - os.remove(index_metadata.save_file_path) + if os.path.isfile(index_metadata.save_file_path): + # For service-hosting-based vector database, we should not + # touch their base directory. The only case that needs to + # be taken care of is FAISS index local disk file. + os.remove(index_metadata.save_file_path) index_obj.delete(self.session) except Exception: logger.exception("Delete index failed for name {}".format(name)) diff --git a/evadb/executor/executor_utils.py b/evadb/executor/executor_utils.py index d936c879dd..ddd9590033 100644 --- a/evadb/executor/executor_utils.py +++ b/evadb/executor/executor_utils.py @@ -172,7 +172,7 @@ def handle_vector_store_params( elif vector_store_type == VectorStoreType.QDRANT: return {"index_db": str(Path(index_path).parent)} elif vector_store_type == VectorStoreType.CHROMADB: - return {"index_path": index_path} + return {"index_path": str(Path(index_path).parent)} elif vector_store_type == VectorStoreType.PINECONE: return {} else: diff --git a/evadb/third_party/vector_stores/pinecone.py b/evadb/third_party/vector_stores/pinecone.py index c5215ce59a..837c95e579 100644 --- a/evadb/third_party/vector_stores/pinecone.py +++ b/evadb/third_party/vector_stores/pinecone.py @@ -46,7 +46,7 @@ def __init__(self, index_name: str) -> None: assert ( self._api_key - ), "Please set your Pinecone API key in evadb.yml file (third_party, pinecone_api_key) or environment variable (PINECONE_KEY)" + ), "Please set your Pinecone API key in evadb.yml file (third_party, pinecone_api_key) or environment variable (PINECONE_KEY). It can be found at Pinecone Dashboard > API Keys > Value" # Get the environment name. self._environment = ConfigurationManager().get_value( @@ -57,7 +57,7 @@ def __init__(self, index_name: str) -> None: assert ( self._environment - ), "Please set the Pinecone environment key in evadb.yml file (third_party, pinecone_env) or environment variable (PINECONE_ENV)" + ), "Please set the Pinecone environment key in evadb.yml file (third_party, pinecone_env) or environment variable (PINECONE_ENV). It can be found Pinecone Dashboard > API Keys > Environment." if not _pinecone_init_done: # Initialize pinecone. diff --git a/test/integration_tests/long/test_similarity.py b/test/integration_tests/long/test_similarity.py index ff4ff192d4..b16278c0cf 100644 --- a/test/integration_tests/long/test_similarity.py +++ b/test/integration_tests/long/test_similarity.py @@ -306,8 +306,10 @@ def test_should_do_vector_index_scan(self): ) # Cleanup - self.evadb.catalog().drop_index_catalog_entry("testFaissIndexScanRewrite1") - self.evadb.catalog().drop_index_catalog_entry("testFaissIndexScanRewrite2") + drop_query = "DROP INDEX testFaissIndexScanRewrite1" + execute_query_fetch_all(self.evadb, drop_query) + drop_query = "DROP INDEX testFaissIndexScanRewrite2" + execute_query_fetch_all(self.evadb, drop_query) def test_should_not_do_vector_index_scan_with_desc_order(self): # Execution with index scan. @@ -347,7 +349,8 @@ def test_should_not_do_vector_index_scan_with_desc_order(self): self.assertTrue(np.array_equal(actual_open, base_img + 3)) # Cleanup - self.evadb.catalog().drop_index_catalog_entry("testFaissIndexScanRewrite") + drop_query = "DROP INDEX testFaissIndexScanRewrite" + execute_query_fetch_all(self.evadb, drop_query) def test_should_not_do_vector_index_scan_with_predicate(self): # Execution with index scan. @@ -370,86 +373,122 @@ def test_should_not_do_vector_index_scan_with_predicate(self): self.assertFalse("FaissIndexScan" in batch.frames[0][0]) # Cleanup - self.evadb.catalog().drop_index_catalog_entry("testFaissIndexScanRewrite") + drop_query = "DROP INDEX testFaissIndexScanRewrite" + execute_query_fetch_all(self.evadb, drop_query) + + def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_faiss(self): + for _ in range(2): + create_index_query = """CREATE INDEX testFaissIndexImageDataset + ON testSimilarityImageDataset (DummyFeatureExtractor(data)) + USING FAISS;""" + execute_query_fetch_all(self.evadb, create_index_query) + + select_query = """SELECT _row_id FROM testSimilarityImageDataset + ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data)) + LIMIT 1;""".format( + self.img_path + ) + explain_batch = execute_query_fetch_all( + self.evadb, f"EXPLAIN {select_query}" + ) + self.assertTrue("VectorIndexScan" in explain_batch.frames[0][0]) - def test_end_to_end_index_scan_should_work_correctly_on_image_dataset(self): - create_index_query = """CREATE INDEX testFaissIndexImageDataset - ON testSimilarityImageDataset (DummyFeatureExtractor(data)) - USING FAISS;""" - execute_query_fetch_all(self.evadb, create_index_query) - select_query = """SELECT _row_id FROM testSimilarityImageDataset - ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data)) - LIMIT 1;""".format( - self.img_path - ) - res_batch = execute_query_fetch_all(self.evadb, select_query) - self.assertEqual(res_batch.frames["testsimilarityimagedataset._row_id"][0], 5) + res_batch = execute_query_fetch_all(self.evadb, select_query) + self.assertEqual( + res_batch.frames["testsimilarityimagedataset._row_id"][0], 5 + ) - # Cleanup - self.evadb.catalog().drop_index_catalog_entry("testFaissIndexImageDataset") + # Cleanup + drop_query = "DROP INDEX testFaissIndexImageDataset" + execute_query_fetch_all(self.evadb, drop_query) @qdrant_skip_marker def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_qdrant(self): - create_index_query = """CREATE INDEX testQdrantIndexImageDataset - ON testSimilarityImageDataset (DummyFeatureExtractor(data)) - USING QDRANT;""" - execute_query_fetch_all(self.evadb, create_index_query) - select_query = """SELECT _row_id FROM testSimilarityImageDataset - ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data)) - LIMIT 1;""".format( - self.img_path - ) + for _ in range(2): + create_index_query = """CREATE INDEX testQdrantIndexImageDataset + ON testSimilarityImageDataset (DummyFeatureExtractor(data)) + USING QDRANT;""" + execute_query_fetch_all(self.evadb, create_index_query) + + select_query = """SELECT _row_id FROM testSimilarityImageDataset + ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data)) + LIMIT 1;""".format( + self.img_path + ) + explain_batch = execute_query_fetch_all( + self.evadb, f"EXPLAIN {select_query}" + ) + self.assertTrue("VectorIndexScan" in explain_batch.frames[0][0]) - """|__ ProjectPlan - |__ VectorIndexScanPlan - |__ SeqScanPlan - |__ StoragePlan""" + """|__ ProjectPlan + |__ VectorIndexScanPlan + |__ SeqScanPlan + |__ StoragePlan""" - res_batch = execute_query_fetch_all(self.evadb, select_query) - self.assertEqual(res_batch.frames["testsimilarityimagedataset._row_id"][0], 5) + res_batch = execute_query_fetch_all(self.evadb, select_query) + self.assertEqual( + res_batch.frames["testsimilarityimagedataset._row_id"][0], 5 + ) - # Cleanup - self.evadb.catalog().drop_index_catalog_entry("testQdrantIndexImageDataset") + # Cleanup + drop_query = "DROP INDEX testQdrantIndexImageDataset" + execute_query_fetch_all(self.evadb, drop_query) @chromadb_skip_marker def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_chromadb( self, ): - create_index_query = """CREATE INDEX testChromaDBIndexImageDataset - ON testSimilarityImageDataset (DummyFeatureExtractor(data)) - USING CHROMADB;""" - execute_query_fetch_all(self.evadb, create_index_query) + for _ in range(2): + create_index_query = """CREATE INDEX testChromaDBIndexImageDataset + ON testSimilarityImageDataset (DummyFeatureExtractor(data)) + USING CHROMADB;""" + execute_query_fetch_all(self.evadb, create_index_query) - select_query = """SELECT _row_id FROM testSimilarityImageDataset - ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data)) - LIMIT 1;""".format( - self.img_path - ) + select_query = """SELECT _row_id FROM testSimilarityImageDataset + ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data)) + LIMIT 1;""".format( + self.img_path + ) + explain_batch = execute_query_fetch_all( + self.evadb, f"EXPLAIN {select_query}" + ) + self.assertTrue("VectorIndexScan" in explain_batch.frames[0][0]) + + res_batch = execute_query_fetch_all(self.evadb, select_query) + self.assertEqual( + res_batch.frames["testsimilarityimagedataset._row_id"][0], 5 + ) - res_batch = execute_query_fetch_all(self.evadb, select_query) - self.assertEqual(res_batch.frames["testsimilarityimagedataset._row_id"][0], 5) + # Cleanup + drop_query = "DROP INDEX testChromaDBIndexImageDataset" + execute_query_fetch_all(self.evadb, drop_query) @pinecone_skip_marker def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_pinecone( self, ): - # We need to always drop the index as Pinecone's free tier only supports a single current index. - drop_index_query = "DROP INDEX IF EXISTS testpineconeindeximagedataset;" - execute_query_fetch_all(self.evadb, drop_index_query) - create_index_query = """CREATE INDEX testpineconeindeximagedataset - ON testSimilarityImageDataset (DummyFeatureExtractor(data)) - USING PINECONE;""" - execute_query_fetch_all(self.evadb, create_index_query) - # Sleep to ensure the pinecone records get updated as Pinecone is eventually consistent. - time.sleep(20) + for _ in range(2): + create_index_query = """CREATE INDEX testpineconeindeximagedataset + ON testSimilarityImageDataset (DummyFeatureExtractor(data)) + USING PINECONE;""" + execute_query_fetch_all(self.evadb, create_index_query) + # Sleep to ensure the pinecone records get updated as Pinecone is eventually consistent. + time.sleep(20) + + select_query = """SELECT _row_id FROM testSimilarityImageDataset + ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data)) + LIMIT 1;""".format( + self.img_path + ) + explain_batch = execute_query_fetch_all( + self.evadb, f"EXPLAIN {select_query}" + ) + self.assertTrue("VectorIndexScan" in explain_batch.frames[0][0]) - select_query = """SELECT _row_id FROM testSimilarityImageDataset - ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data)) - LIMIT 1;""".format( - self.img_path - ) + res_batch = execute_query_fetch_all(self.evadb, select_query) + self.assertEqual( + res_batch.frames["testsimilarityimagedataset._row_id"][0], 5 + ) - res_batch = execute_query_fetch_all(self.evadb, select_query) - self.assertEqual(res_batch.frames["testsimilarityimagedataset._row_id"][0], 5) - drop_index_query = "DROP INDEX testpineconeindeximagedataset;" - execute_query_fetch_all(self.evadb, drop_index_query) + drop_index_query = "DROP INDEX testpineconeindeximagedataset;" + execute_query_fetch_all(self.evadb, drop_index_query)