From 8682fd15a23ed4bd8f3921e6d0de3512c44bd8d5 Mon Sep 17 00:00:00 2001 From: "terence.yoo" Date: Mon, 23 Dec 2024 14:39:47 +0900 Subject: [PATCH] optimize read performance for accumulated delete markers on the same row or cell --- .../hbase/regionserver/StoreScanner.java | 11 +- .../NormalUserScanQueryMatcher.java | 55 +++- .../querymatcher/ScanQueryMatcher.java | 22 +- ...tStoreScannerDeleteMarkerOptimization.java | 257 ++++++++++++++++++ ...nQueryMatcherDeleteMarkerOptimization.java | 257 ++++++++++++++++++ 5 files changed, 596 insertions(+), 6 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScannerDeleteMarkerOptimization.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcherDeleteMarkerOptimization.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index 016d503f5eab..ddc1062582d0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -636,10 +636,11 @@ public boolean next(List outResult, ScannerContext scanner scannerContext.incrementBlockProgress(blockSize); }); - prevCell = cell; scannerContext.setLastPeekedCell(cell); topChanged = false; - ScanQueryMatcher.MatchCode qcode = matcher.match(cell); + ScanQueryMatcher.MatchCode qcode = matcher.match(cell, prevCell); + LOG.trace("next - cell={}, prevCell={}, qCode={}", cell, prevCell, qcode); + prevCell = cell; switch (qcode) { case INCLUDE: case INCLUDE_AND_SEEK_NEXT_ROW: @@ -757,6 +758,12 @@ public boolean next(List outResult, ScannerContext scanner if (stateAfterSeekNextColumn != null) { return scannerContext.setScannerState(stateAfterSeekNextColumn).hasMoreValues(); } + // for skipping delete markers + if ( + CellUtil.isDelete(cell) && this.heap.peek() != null && this.heap.peek().equals(cell) + ) { + this.heap.next(); + } break; case SKIP: diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/NormalUserScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/NormalUserScanQueryMatcher.java index 9ad3c792345e..bf2c5b1ac7dc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/NormalUserScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/NormalUserScanQueryMatcher.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.regionserver.querymatcher; import java.io.IOException; +import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.ExtendedCell; import org.apache.hadoop.hbase.KeepDeletedCells; import org.apache.hadoop.hbase.PrivateCellUtil; @@ -40,12 +41,15 @@ public abstract class NormalUserScanQueryMatcher extends UserScanQueryMatcher { /** whether time range queries can see rows "behind" a delete */ protected final boolean seePastDeleteMarkers; + private final int scanMaxVersions; + protected NormalUserScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker columns, boolean hasNullColumn, DeleteTracker deletes, long oldestUnexpiredTS, long now) { super(scan, scanInfo, columns, hasNullColumn, oldestUnexpiredTS, now); this.deletes = deletes; this.get = scan.isGetScan(); this.seePastDeleteMarkers = scanInfo.getKeepDeletedCells() != KeepDeletedCells.FALSE; + this.scanMaxVersions = scan.getMaxVersions(); } @Override @@ -56,11 +60,16 @@ public void beforeShipped() throws IOException { @Override public MatchCode match(ExtendedCell cell) throws IOException { + return match(cell, null); + } + + @Override + public MatchCode match(ExtendedCell cell, ExtendedCell prevCell) throws IOException { if (filter != null && filter.filterAllRemaining()) { return MatchCode.DONE_SCAN; } - MatchCode returnCode = preCheck(cell); - if (returnCode != null) { + MatchCode returnCode; + if ((returnCode = preCheck(cell)) != null) { return returnCode; } long timestamp = cell.getTimestamp(); @@ -71,15 +80,55 @@ public MatchCode match(ExtendedCell cell) throws IOException { if (includeDeleteMarker) { this.deletes.add(cell); } + // optimization for delete markers + if ((returnCode = checkCanSeekNextCol(cell, prevCell)) != null) { + return returnCode; + } return MatchCode.SKIP; } returnCode = checkDeleted(deletes, cell); - if (returnCode != null) { + // optimization when prevCell is Delete or DeleteFamilyVersion + if ((returnCode = checkDeletedEffectively(cell, prevCell)) != null) { + return returnCode; + } + if ((returnCode = checkDeleted(deletes, cell)) != null) { return returnCode; } return matchColumn(cell, timestamp, typeByte); } + private MatchCode checkCanSeekNextCol(ExtendedCell cell, ExtendedCell prevCell) { + // optimization for DeleteFamily and DeleteColumn(only for empty qualifier) + if ( + canOptimizeReadDeleteMarkers() && (PrivateCellUtil.isDeleteFamily(cell) + || PrivateCellUtil.isDeleteColumns(cell) && cell.getQualifierLength() > 0) + ) { + return MatchCode.SEEK_NEXT_COL; + } + // optimization for duplicate Delete and DeleteFamilyVersion + return checkDeletedEffectively(cell, prevCell); + } + + // If prevCell is a delete marker and cell is a Put or delete marker, + // it means the cell is deleted effectively. + // And we can do SEEK_NEXT_COL. + private MatchCode checkDeletedEffectively(ExtendedCell cell, ExtendedCell prevCell) { + if ( + prevCell != null && canOptimizeReadDeleteMarkers() + && CellUtil.matchingRowColumn(prevCell, cell) && CellUtil.matchingTimestamp(prevCell, cell) + && (PrivateCellUtil.isDeleteType(prevCell) && cell.getQualifierLength() > 0 + || PrivateCellUtil.isDeleteFamilyVersion(prevCell)) + ) { + return MatchCode.SEEK_NEXT_COL; + } + return null; + } + + private boolean canOptimizeReadDeleteMarkers() { + // for simplicity, optimization works only for these cases + return !seePastDeleteMarkers && scanMaxVersions == 1; + } + @Override protected void reset() { deletes.reset(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanQueryMatcher.java index dc3259f03d3a..044f7e62854f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanQueryMatcher.java @@ -238,6 +238,25 @@ protected final MatchCode checkDeleted(DeleteTracker deletes, ExtendedCell cell) */ public abstract MatchCode match(ExtendedCell cell) throws IOException; + /** + * Determines if the caller should do one of several things: + * + * @param cell KeyValue to check + * @param prevCell KeyValue checked previously + * @return The match code instance. + * @throws IOException in case there is an internal consistency problem caused by a data + * corruption. + */ + public MatchCode match(ExtendedCell cell, ExtendedCell prevCell) throws IOException { + return match(cell); + } + /** Returns the start key */ public ExtendedCell getStartKey() { return startKey; @@ -284,7 +303,8 @@ public ExtendedCell getKeyForNextColumn(ExtendedCell cell) { // see HBASE-18471 for more details // see TestFromClientSide3#testScanAfterDeletingSpecifiedRow // see TestFromClientSide3#testScanAfterDeletingSpecifiedRowV2 - if (cell.getQualifierLength() == 0) { + // But we can seek to next column if the cell is a type of DeleteFamily. + if (cell.getQualifierLength() == 0 && !PrivateCellUtil.isDeleteFamily(cell)) { ExtendedCell nextKey = PrivateCellUtil.createNextOnRowCol(cell); if (nextKey != cell) { return nextKey; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScannerDeleteMarkerOptimization.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScannerDeleteMarkerOptimization.java new file mode 100644 index 000000000000..f2c2e43d36a8 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScannerDeleteMarkerOptimization.java @@ -0,0 +1,257 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.apache.hadoop.hbase.KeyValueTestUtil.create; +import static org.apache.hadoop.hbase.regionserver.KeyValueScanFixture.scanFixture; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.ExtendedCell; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeepDeletedCells; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.Type; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.log4j.Level; +import org.apache.log4j.LogManager; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ RegionServerTests.class, SmallTests.class }) +public class TestStoreScannerDeleteMarkerOptimization { + private static final String CF = "cf"; + private static final byte[] CF_BYTES = Bytes.toBytes(CF); + private static final Configuration CONF = HBaseConfiguration.create(); + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestStoreScannerDeleteMarkerOptimization.class); + + private List kvs; + private Pair, Long> result; + + private Pair, Long> scanAll() throws IOException { + return scanAll(KeepDeletedCells.FALSE, 1); + } + + private Pair, Long> scanAll(KeepDeletedCells keepDeletedCells, int maxVersions) + throws IOException { + ScanInfo scanInfo = new ScanInfo(CONF, CF_BYTES, 0, maxVersions, Long.MAX_VALUE, + keepDeletedCells, HConstants.DEFAULT_BLOCKSIZE, 0, CellComparator.getInstance(), false); + List scanners = scanFixture(kvs.toArray(new KeyValue[0])); + Scan scanSpec = new Scan().readVersions(maxVersions); + try (StoreScanner scan = new StoreScanner(scanSpec, scanInfo, null, scanners)) { + List results = new ArrayList<>(); + // noinspection StatementWithEmptyBody + while (scan.next(results)) { + } + return new Pair<>(results, scan.getEstimatedNumberOfKvsScanned()); + } + } + + @BeforeClass + public static void setUpBeforeClass() { + LogManager.getLogger("org.apache.hadoop.hbase.regionserver.StoreScanner").setLevel(Level.TRACE); + } + + @Before + public void setUp() { + kvs = new ArrayList<>(); + } + + @Test + public void testDelete() throws IOException { + kvs.add(create("r", CF, "q", 2, Type.Delete, "")); + kvs.add(create("r", CF, "q", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q", 1, Type.Delete, "")); + kvs.add(create("r", CF, "q", 1, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 1, Type.Put, "v")); + + result = scanAll(); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(4), result.getFirst().get(0)); + // kvs0, kvs2, kvs4 + assertEquals(3, result.getSecond().longValue()); + } + + @Test + public void testDuplicatDelete() throws IOException { + kvs.add(create("r", CF, "q", 3, Type.Delete, "")); + kvs.add(create("r", CF, "q", 3, Type.Delete, "")); + kvs.add(create("r", CF, "q", 3, Type.Delete, "")); + + result = scanAll(); + assertEquals(0, result.getFirst().size()); + // kvs0, kvs1 + assertEquals(2, result.getSecond().longValue()); + } + + @Test + public void testNotDuplicatDelete() throws IOException { + kvs.add(create("r", CF, "q", 3, Type.Delete, "")); + kvs.add(create("r", CF, "q", 2, Type.Delete, "")); + kvs.add(create("r", CF, "q", 1, Type.Put, "v")); + + result = scanAll(); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(2), result.getFirst().get(0)); + assertEquals(kvs.size(), result.getSecond().longValue()); + } + + @Test + public void testDeleteEmptyQualifier() throws IOException { + kvs.add(create("r", CF, "", 2, Type.Delete, "")); + kvs.add(create("r", CF, "", 2, Type.Put, "v")); + kvs.add(create("r", CF, "", 1, Type.Delete, "")); + kvs.add(create("r", CF, "", 1, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 1, Type.Put, "v")); + + result = scanAll(); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(4), result.getFirst().get(0)); + assertEquals(kvs.size(), result.getSecond().longValue()); + } + + @Test + public void testDeleteFamilyVersion() throws IOException { + // DeleteFamilyVersion cannot be optimized + kvs.add(create("r", CF, "", 3, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "", 2, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "", 1, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "q", 3, Type.Put, "v")); + kvs.add(create("r", CF, "q", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q", 1, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 3, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 1, Type.Put, "v")); + + result = scanAll(); + assertEquals(0, result.getFirst().size()); + assertEquals(kvs.size(), result.getSecond().longValue()); + } + + @Test + public void testDuplicatDeleteFamilyVersion() throws IOException { + kvs.add(create("r", CF, "q", 3, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "q", 3, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "q", 3, Type.DeleteFamilyVersion, "")); + + result = scanAll(); + assertEquals(0, result.getFirst().size()); + // kvs0, kvs1 + assertEquals(2, result.getSecond().longValue()); + } + + @Test + public void testNotDuplicatDeleteFamilyVersion() throws IOException { + kvs.add(create("r", CF, "q", 3, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "q", 2, Type.DeleteFamilyVersion, "")); + kvs.add(create("r", CF, "q", 1, Type.DeleteFamilyVersion, "")); + + result = scanAll(); + assertEquals(0, result.getFirst().size()); + assertEquals(kvs.size(), result.getSecond().longValue()); + } + + @Test + public void testDeleteColumn() throws IOException { + kvs.add(create("r", CF, "q", 3, Type.DeleteColumn, "")); + kvs.add(create("r", CF, "q", 2, Type.DeleteColumn, "")); + kvs.add(create("r", CF, "q", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q", 1, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 1, Type.Put, "v")); + + result = scanAll(); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(4), result.getFirst().get(0)); + // kvs0, kvs4 + assertEquals(2, result.getSecond().longValue()); + } + + @Test + public void testDeleteFamily() throws IOException { + kvs.add(create("r", CF, "", 3, Type.DeleteFamily, "")); + kvs.add(create("r", CF, "", 2, Type.DeleteFamily, "")); + kvs.add(create("r", CF, "", 1, Type.DeleteFamily, "")); + kvs.add(create("r", CF, "q", 4, Type.Put, "v")); + kvs.add(create("r", CF, "q", 3, Type.Put, "v")); + kvs.add(create("r", CF, "q", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 1, Type.Put, "v")); + + result = scanAll(); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(3), result.getFirst().get(0)); + // kvs0, kvs3, kvs4, kvs6 + assertEquals(4, result.getSecond().longValue()); + } + + @Test + public void testKeepDeletedCells() throws IOException { + kvs.add(create("r", CF, "q", 2, Type.Delete, "")); + kvs.add(create("r", CF, "q", 2, Type.Put, "v")); + kvs.add(create("r", CF, "q", 1, Type.Delete, "")); + kvs.add(create("r", CF, "q", 1, Type.Put, "v")); + kvs.add(create("r", CF, "q1", 1, Type.Put, "v")); + + // optimization works only for KeepDeletedCells.FALSE + result = scanAll(KeepDeletedCells.TRUE, 1); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(4), result.getFirst().get(0)); + assertEquals(kvs.size(), result.getSecond().longValue()); + + // optimization works only for KeepDeletedCells.FALSE + result = scanAll(KeepDeletedCells.TTL, 1); + assertEquals(1, result.getFirst().size()); + assertEquals(kvs.get(4), result.getFirst().get(0)); + assertEquals(kvs.size(), result.getSecond().longValue()); + } + + @Test + public void testScanMaxVersions() throws IOException { + kvs.add(create("r", CF, "q", 4, Type.DeleteColumn, "")); + kvs.add(create("r", CF, "q", 3, Type.DeleteColumn, "")); + kvs.add(create("r", CF, "q", 2, Type.DeleteColumn, "")); + kvs.add(create("r", CF, "q", 1, Type.Put, "v")); + + // optimization works only for maxVersions = 1 + result = scanAll(KeepDeletedCells.FALSE, 1); + assertEquals(0, result.getFirst().size()); + // kvs0 + assertEquals(1, result.getSecond().longValue()); + + // optimization does not work + result = scanAll(KeepDeletedCells.FALSE, 2); + assertEquals(0, result.getFirst().size()); + assertEquals(kvs.size(), result.getSecond().longValue()); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcherDeleteMarkerOptimization.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcherDeleteMarkerOptimization.java new file mode 100644 index 000000000000..ac4745cc1ffe --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcherDeleteMarkerOptimization.java @@ -0,0 +1,257 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.querymatcher; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hbase.ExtendedCell; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeepDeletedCells; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.Type; +import org.apache.hadoop.hbase.regionserver.ScanInfo; +import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ RegionServerTests.class, SmallTests.class }) +public class TestUserScanQueryMatcherDeleteMarkerOptimization extends AbstractTestScanQueryMatcher { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestUserScanQueryMatcherDeleteMarkerOptimization.class); + + private List> pairs; + + private void verify(List> pairs) throws IOException { + verify(pairs, 1); + } + + private void verify(List> pairs, int maxVersions) throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + scan.readVersions(maxVersions); + UserScanQueryMatcher qm = UserScanQueryMatcher.create(scan, + new ScanInfo(this.conf, fam1, 0, maxVersions, ttl, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + get.getFamilyMap().get(fam1), now - ttl, now, null); + + List storedKVs = new ArrayList<>(); + for (Pair Pair : pairs) { + storedKVs.add(Pair.getFirst()); + } + + List scannedKVs = new ArrayList<>(); + qm.setToNewRow(storedKVs.get(0)); + + ExtendedCell prevCell = null; + + for (KeyValue kv : storedKVs) { + MatchCode matchCode = qm.match(kv, prevCell); + prevCell = kv; + scannedKVs.add(matchCode); + } + + assertEquals(pairs.size(), scannedKVs.size()); + for (int i = 0; i < scannedKVs.size(); i++) { + assertEquals("second, index - " + i, pairs.get(i).getSecond(), scannedKVs.get(i)); + } + } + + @Before + public void setUp() throws Exception { + super.setUp(); + pairs = new ArrayList<>(); + } + + private KeyValue createKV(byte[] col, long timestamp, Type delete) { + return new KeyValue(row1, fam1, col, timestamp, delete, data); + } + + @Test + public void testNotDuplicatDelete() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Delete), MatchCode.SKIP)); + verify(pairs); + } + + @Test + public void testEffectiveDelete() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDeleteWithMaxVersions() throws IOException { + // For simplicity, do not optimize when maxVersions > 1 + pairs.add(new Pair<>(createKV(col1, 2, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Put), MatchCode.INCLUDE)); + verify(pairs, 2); + } + + @Test + public void testDuplicatDelete() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.Delete), MatchCode.SKIP)); + // After prevCell is read and it is checked duplicated, the second cell can be SEEK_NEXT_COL + pairs.add(new Pair<>(createKV(col1, 2, Type.Delete), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDeleteAfterPut() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.INCLUDE)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Delete), MatchCode.SKIP)); + verify(pairs); + } + + @Test + public void testChangeColumn() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.INCLUDE)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col2, 2, Type.Put), MatchCode.INCLUDE)); + verify(pairs); + } + + @Test + public void testNotDuplicatDeleteFamilyVersion() throws IOException { + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamilyVersion), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 1, Type.DeleteFamilyVersion), MatchCode.SKIP)); + verify(pairs); + } + + @Test + public void testEffectiveDeleteFamilyVersion() throws IOException { + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamilyVersion), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 2, Type.Put), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDuplicatDeleteFamilyVersion() throws IOException { + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamilyVersion), MatchCode.SKIP)); + // After prevCell is read and it is checked duplicated, the second cell can be SEEK_NEXT_COL + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamilyVersion), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDeleteFamilyVersionAfterPut() throws IOException { + pairs.add(new Pair<>(createKV(null, 2, Type.Put), MatchCode.INCLUDE)); + pairs.add(new Pair<>(createKV(null, 1, Type.DeleteFamilyVersion), MatchCode.SKIP)); + verify(pairs); + } + + @Test + public void testDifferentDeleteFamilyVersion() throws IOException { + // DeleteFamilyVersion cannot be optimized + pairs.add(new Pair<>(createKV(null, 3, Type.DeleteFamilyVersion), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamilyVersion), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 1, Type.DeleteFamilyVersion), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 3, Type.Put), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Put), MatchCode.SKIP)); + verify(pairs); + } + + @Test + public void testDeleteColumn() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.DeleteColumn), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDeleteColumnAfterPut() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.INCLUDE)); + pairs.add(new Pair<>(createKV(col1, 1, Type.DeleteColumn), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDeleteFamily() throws IOException { + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamily), MatchCode.SEEK_NEXT_COL)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Put), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testDeleteFamilyAfterPut() throws IOException { + pairs.add(new Pair<>(createKV(null, 3, Type.Put), MatchCode.INCLUDE)); + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamily), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testEmptyQualifierDeleteShouldNotSeekNextColumn() throws IOException { + // The empty qualifier is used for DeleteFamily and DeleteFamilyVersion markers only. + // So we should not seek to next column for any other type. + pairs.add(new Pair<>(createKV(null, 4, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 4, Type.Delete), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 3, Type.DeleteColumn), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 3, Type.DeleteColumn), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamilyVersion), MatchCode.SKIP)); + pairs.add(new Pair<>(createKV(null, 2, Type.DeleteFamilyVersion), MatchCode.SEEK_NEXT_COL)); + pairs.add(new Pair<>(createKV(null, 1, Type.DeleteFamily), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } + + @Test + public void testKeyForNextColumnForDeleteFamily() throws IOException { + long now = EnvironmentEdgeManager.currentTime(); + UserScanQueryMatcher qm = UserScanQueryMatcher.create( + scan, new ScanInfo(this.conf, fam1, 0, 1, ttl, KeepDeletedCells.FALSE, + HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), + get.getFamilyMap().get(fam1), now - ttl, now, null); + + KeyValue kv; + ExtendedCell nextColumnKey; + + // empty qualifier should not return the instance of LastOnRowColCell except for DeleteFamily + kv = createKV(null, 2, Type.Put); + nextColumnKey = qm.getKeyForNextColumn(kv); + assertNotEquals("LastOnRowColCell", nextColumnKey.getClass().getSimpleName()); + + // empty qualifier should not return the instance of LastOnRowColCell except for DeleteFamily + kv = createKV(null, 2, Type.Delete); + nextColumnKey = qm.getKeyForNextColumn(kv); + assertNotEquals("LastOnRowColCell", nextColumnKey.getClass().getSimpleName()); + + // empty qualifier should return the instance of LastOnRowColCell only for DeleteFamily + kv = createKV(null, 2, Type.DeleteFamily); + nextColumnKey = qm.getKeyForNextColumn(kv); + assertEquals("LastOnRowColCell", nextColumnKey.getClass().getSimpleName()); + } + + @Test + public void testNoDelete() throws IOException { + pairs.add(new Pair<>(createKV(col1, 2, Type.Put), MatchCode.INCLUDE)); + pairs.add(new Pair<>(createKV(col1, 1, Type.Put), MatchCode.SEEK_NEXT_COL)); + verify(pairs); + } +}