From 53a5ad58e4244496e3cf6eec2d47ef8b12810953 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 20 Oct 2023 12:13:13 +0000 Subject: [PATCH] [Remote State] fix lock release before deletion is completed (#10611) * fix lock release before deletion is completed Signed-off-by: bansvaru (cherry picked from commit c400d84f0e884217454ddfcc1503d02e0b280fa9) Signed-off-by: github-actions[bot] --- .../remote/RemoteClusterStateService.java | 8 +++-- .../RemoteClusterStateServiceTests.java | 34 +++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index b9d06c8fbb1c1..96ce2fc779ea0 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -1072,7 +1072,8 @@ public void onFailure(Exception e) { * @param clusterUUID uuid of cluster state to refer to in remote * @param manifestsToRetain no of latest manifest files to keep in remote */ - private void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) { + // package private for testing + void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) { if (deleteStaleMetadataRunning.compareAndSet(false, true) == false) { logger.info("Delete stale cluster metadata task is already in progress."); return; @@ -1109,8 +1110,9 @@ public void onFailure(Exception e) { } } ); - } finally { + } catch (Exception e) { deleteStaleMetadataRunning.set(false); + throw e; } } @@ -1190,7 +1192,7 @@ private void deleteStalePaths(String clusterName, String clusterUUID, List { String clusterName = clusterState.getClusterName().value(); - logger.info("Deleting stale cluster UUIDs data from remote [{}]", clusterName); + logger.debug("Deleting stale cluster UUIDs data from remote [{}]", clusterName); Set allClustersUUIDsInRemote; try { allClustersUUIDsInRemote = new HashSet<>(getAllClusterUUIDs(clusterState.getClusterName().value())); diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 49b7f0ff8d1a9..433eac63e9580 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -62,6 +62,9 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; import org.mockito.ArgumentCaptor; @@ -73,6 +76,7 @@ import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; import static org.opensearch.gateway.remote.RemoteClusterStateService.METADATA_FILE_PREFIX; +import static org.opensearch.gateway.remote.RemoteClusterStateService.RETAINED_MANIFESTS; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; @@ -1004,6 +1008,36 @@ public void testFileNames() { assertThat(splittedName[3], is("P")); } + public void testSingleConcurrentExecutionOfStaleManifestCleanup() throws Exception { + BlobContainer blobContainer = mock(BlobContainer.class); + BlobPath blobPath = new BlobPath().add("random-path"); + when((blobStoreRepository.basePath())).thenReturn(blobPath); + when(blobStore.blobContainer(any())).thenReturn(blobContainer); + + CountDownLatch latch = new CountDownLatch(1); + AtomicInteger callCount = new AtomicInteger(0); + doAnswer(invocation -> { + callCount.incrementAndGet(); + if (latch.await(5000, TimeUnit.SECONDS) == false) { + throw new Exception("Timed out waiting for delete task queuing to complete"); + } + return null; + }).when(blobContainer) + .listBlobsByPrefixInSortedOrder( + any(String.class), + any(int.class), + any(BlobContainer.BlobNameSortOrder.class), + any(ActionListener.class) + ); + + remoteClusterStateService.start(); + remoteClusterStateService.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); + remoteClusterStateService.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); + + latch.countDown(); + assertBusy(() -> assertEquals(1, callCount.get())); + } + private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { final BlobPath blobPath = mock(BlobPath.class); when((blobStoreRepository.basePath())).thenReturn(blobPath);