From 1db89773e614b8530c8c37abd428fa8f664d85b8 Mon Sep 17 00:00:00 2001 From: Wellington Ramos Chevreuil Date: Wed, 8 Jul 2020 15:17:14 +0100 Subject: [PATCH] =?UTF-8?q?HBASE-21596=20Delete=20for=20a=20specific=20cel?= =?UTF-8?q?l=20version=20can=20bring=20back=20version=E2=80=A6=20(#2009)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Viraj Jasani --- .../hadoop/hbase/regionserver/HRegion.java | 79 +++++++++++++++---- .../hadoop/hbase/TimestampTestBase.java | 10 ++- .../hbase/client/TestFromClientSide.java | 16 ++-- .../hbase/client/TestFromClientSide5.java | 8 +- .../hbase/regionserver/TestKeepDeletes.java | 67 ++++++++++++++++ .../regionserver/TestMinorCompaction.java | 11 ++- 6 files changed, 155 insertions(+), 36 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 40a009c2c7c..1428caf532b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -3151,6 +3151,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi List cells = e.getValue(); assert cells instanceof RandomAccess; + List deleteCells = new ArrayList<>(); Map kvCount = new TreeMap<>(Bytes.BYTES_COMPARATOR); int listSize = cells.size(); for (int i=0; i < listSize; i++) { @@ -3170,37 +3171,87 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi count = kvCount.get(qual); Get get = new Get(CellUtil.cloneRow(cell)); - get.readVersions(count); - get.addColumn(family, qual); + get.readVersions(Integer.MAX_VALUE); if (coprocessorHost != null) { if (!coprocessorHost.prePrepareTimeStampForDeleteVersion(mutation, cell, byteNow, get)) { - updateDeleteLatestVersionTimestamp(cell, get, count, byteNow); + updateDeleteLatestVersionTimestamp(cell, get, count, + this.htableDescriptor.getColumnFamily(family).getMaxVersions(), + byteNow, deleteCells); + } } else { - updateDeleteLatestVersionTimestamp(cell, get, count, byteNow); + updateDeleteLatestVersionTimestamp(cell, get, count, + this.htableDescriptor.getColumnFamily(family).getMaxVersions(), + byteNow, deleteCells); } } else { PrivateCellUtil.updateLatestStamp(cell, byteNow); + deleteCells.add(cell); } } + e.setValue(deleteCells); } } - void updateDeleteLatestVersionTimestamp(Cell cell, Get get, int count, byte[] byteNow) - throws IOException { - List result = get(get, false); - + private void updateDeleteLatestVersionTimestamp(Cell cell, Get get, int count, int maxVersions, + byte[] byteNow, List deleteCells) throws IOException { + List result = new ArrayList<>(deleteCells); + Scan scan = new Scan(get); + scan.setRaw(true); + this.getScanner(scan).next(result); + List cells = new ArrayList<>(); if (result.size() < count) { // Nothing to delete PrivateCellUtil.updateLatestStamp(cell, byteNow); - return; + cells.add(cell); + deleteCells.addAll(cells); + } else if (result.size() > count) { + int currentVersion = 0; + long latestCellTS = Long.MAX_VALUE; + result.sort((cell1, cell2) -> { + if(cell1.getTimestamp()>cell2.getTimestamp()){ + return -1; + } else if(cell1.getTimestamp()= maxVersions) { + Cell tempCell = null; + try { + tempCell = PrivateCellUtil.deepClone(cell); + } catch (CloneNotSupportedException e) { + throw new IOException(e); + } + PrivateCellUtil.setTimestamp(tempCell, getCell.getTimestamp()); + cells.add(tempCell); + } else if (currentVersion == 0) { + PrivateCellUtil.setTimestamp(cell, getCell.getTimestamp()); + cells.add(cell); + } + currentVersion++; + } + latestCellTS = getCell.getTimestamp(); + } + + } else { + Cell getCell = result.get(0); + PrivateCellUtil.setTimestamp(cell, getCell.getTimestamp()); + cells.add(cell); } - if (result.size() > count) { - throw new RuntimeException("Unexpected size: " + result.size()); - } - Cell getCell = result.get(count - 1); - PrivateCellUtil.setTimestamp(cell, getCell.getTimestamp()); + deleteCells.addAll(cells); } @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TimestampTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TimestampTestBase.java index da732ac3a0e..635c348ea50 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TimestampTestBase.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TimestampTestBase.java @@ -70,20 +70,22 @@ public class TimestampTestBase { // If I delete w/o specifying a timestamp, this means I'm deleting the latest. delete(table); // Verify that I get back T2 through T1 -- that the latest version has been deleted. - assertVersions(table, new long [] {T2, T1, T0}); + // Since there were originally 4 puts before the delete, T0 is gone now, + // so after deleting latest, there should be only T2 and T1 + assertVersions(table, new long [] {T2, T1}); // Flush everything out to disk and then retry flusher.flushcache(); - assertVersions(table, new long [] {T2, T1, T0}); + assertVersions(table, new long [] {T2, T1}); // Now add, back a latest so I can test remove other than the latest. put(table); assertVersions(table, new long [] {HConstants.LATEST_TIMESTAMP, T2, T1}); delete(table, T2); - assertVersions(table, new long [] {HConstants.LATEST_TIMESTAMP, T1, T0}); + assertVersions(table, new long [] {HConstants.LATEST_TIMESTAMP, T1}); // Flush everything out to disk and then retry flusher.flushcache(); - assertVersions(table, new long [] {HConstants.LATEST_TIMESTAMP, T1, T0}); + assertVersions(table, new long [] {HConstants.LATEST_TIMESTAMP, T1}); // Now try deleting all from T2 back inclusive (We first need to add T2 // back into the mix and to make things a little interesting, delete and then readd T1. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java index 70d832f57e1..ad0dfdc4901 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java @@ -1741,15 +1741,15 @@ public class TestFromClientSide extends FromClientSideBase { get.addColumn(FAMILIES[0], QUALIFIER); get.readVersions(Integer.MAX_VALUE); result = ht.get(get); - assertNResult(result, ROW, FAMILIES[0], QUALIFIER, new long[] { ts[1], ts[2], ts[3] }, - new byte[][] { VALUES[1], VALUES[2], VALUES[3] }, 0, 2); + assertNResult(result, ROW, FAMILIES[0], QUALIFIER, new long[] { ts[2], ts[3] }, + new byte[][] { VALUES[2], VALUES[3] }, 0, 1); scan = new Scan().withStartRow(ROW); scan.addColumn(FAMILIES[0], QUALIFIER); scan.readVersions(Integer.MAX_VALUE); result = getSingleScanResult(ht, scan); - assertNResult(result, ROW, FAMILIES[0], QUALIFIER, new long[] { ts[1], ts[2], ts[3] }, - new byte[][] { VALUES[1], VALUES[2], VALUES[3] }, 0, 2); + assertNResult(result, ROW, FAMILIES[0], QUALIFIER, new long[] { ts[2], ts[3] }, + new byte[][] { VALUES[2], VALUES[3] }, 0, 1); // Test for HBASE-1847 delete = new Delete(ROW); @@ -1776,8 +1776,8 @@ public class TestFromClientSide extends FromClientSideBase { get.addFamily(FAMILIES[0]); get.readVersions(Integer.MAX_VALUE); result = ht.get(get); - assertNResult(result, ROW, FAMILIES[0], QUALIFIER, new long[] { ts[1], ts[2], ts[3] }, - new byte[][] { VALUES[1], VALUES[2], VALUES[3] }, 0, 2); + assertNResult(result, ROW, FAMILIES[0], QUALIFIER, new long[] { ts[2], ts[3] }, + new byte[][] { VALUES[2], VALUES[3] }, 0, 1); // The Scanner returns the previous values, the expected-naive-unexpected behavior @@ -1785,8 +1785,8 @@ public class TestFromClientSide extends FromClientSideBase { scan.addFamily(FAMILIES[0]); scan.readVersions(Integer.MAX_VALUE); result = getSingleScanResult(ht, scan); - assertNResult(result, ROW, FAMILIES[0], QUALIFIER, new long[] { ts[1], ts[2], ts[3] }, - new byte[][] { VALUES[1], VALUES[2], VALUES[3] }, 0, 2); + assertNResult(result, ROW, FAMILIES[0], QUALIFIER, new long[] { ts[2], ts[3] }, + new byte[][] { VALUES[2], VALUES[3] }, 0, 1); // Test deleting an entire family from one row but not the other various ways diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide5.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide5.java index 6fb50bd99fe..e039d245f06 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide5.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide5.java @@ -1856,8 +1856,8 @@ public class TestFromClientSide5 extends FromClientSideBase { scan.addColumn(FAMILIES[0], QUALIFIER); scan.readVersions(Integer.MAX_VALUE); result = getSingleScanResult(ht, scan); - assertNResult(result, ROW, FAMILIES[0], QUALIFIER, new long[]{ts[1], - ts[2], ts[3]}, new byte[][]{VALUES[1], VALUES[2], VALUES[3]}, 0, 2); + assertNResult(result, ROW, FAMILIES[0], QUALIFIER, new long[]{ ts[2], ts[3]}, + new byte[][]{ VALUES[2], VALUES[3]}, 0, 1); // Test for HBASE-1847 delete = new Delete(ROW); @@ -1885,8 +1885,8 @@ public class TestFromClientSide5 extends FromClientSideBase { scan.addFamily(FAMILIES[0]); scan.readVersions(Integer.MAX_VALUE); result = getSingleScanResult(ht, scan); - assertNResult(result, ROW, FAMILIES[0], QUALIFIER, new long[]{ts[1], - ts[2], ts[3]}, new byte[][]{VALUES[1], VALUES[2], VALUES[3]}, 0, 2); + assertNResult(result, ROW, FAMILIES[0], QUALIFIER, new long[]{ ts[2], ts[3]}, + new byte[][]{VALUES[2], VALUES[3]}, 0, 1); // Test deleting an entire family from one row but not the other various // ways diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeepDeletes.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeepDeletes.java index b2c9ef28aaf..4c80cb144ee 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeepDeletes.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeepDeletes.java @@ -941,6 +941,73 @@ public class TestKeepDeletes { HBaseTestingUtility.closeRegionAndWAL(region); } + /** + * No more than max versions should be retained. For a CF with max versions 1, + * scans/get should not yield any results after a delete. + * + * @throws IOException if test faces any issues while creating/cleaning up necessary region. + */ + @Test + public void testDeleteConsistentWithOneVersion() throws IOException { + HTableDescriptor htd = hbu.createTableDescriptor(TableName.valueOf(name.getMethodName()), 0, 1, + HConstants.FOREVER, KeepDeletedCells.TRUE); + HRegion region = hbu.createLocalHRegion(htd, null, null); + + long ts = EnvironmentEdgeManager.currentTime(); + Put p = new Put(T1, ts); + p.addColumn(c0, c0, T1); + region.put(p); + + p = new Put(T1, ts+1); + p.addColumn(c0, c0, T2); + region.put(p); + + checkGet(region, T1, c0, c0, ts+2, T2); + + Delete delete = new Delete(T1); + delete.addColumn(c0, c0, HConstants.LATEST_TIMESTAMP); + region.delete(delete); + + Get get = new Get(T1); + Result result = region.get(get); + + assertTrue("Get should not return any results.", result.isEmpty()); + + HBaseTestingUtility.closeRegionAndWAL(region); + } + + /** + * No more than max versions should be retained. For a CF with max versions 2, + * scans/get should yield second version after delete. + * + * @throws IOException if test faces any issues while creating/cleaning up necessary region. + */ + @Test + public void testDeleteConsistentWithTwoVersions() throws Exception { + HTableDescriptor htd = hbu.createTableDescriptor(TableName.valueOf(name.getMethodName()), 0, 2, + HConstants.FOREVER, KeepDeletedCells.TRUE); + HRegion region = hbu.createLocalHRegion(htd, null, null); + + long ts = EnvironmentEdgeManager.currentTime(); + Put p = new Put(T1, ts); + p.addColumn(c0, c0, T1); + region.put(p); + + p = new Put(T1, ts+1); + p.addColumn(c0, c0, T2); + region.put(p); + + checkGet(region, T1, c0, c0, ts+2, T2, T1); + + Delete delete = new Delete(T1); + delete.addColumn(c0, c0, HConstants.LATEST_TIMESTAMP); + region.delete(delete); + + checkGet(region, T1, c0, c0, ts+2, T1); + + HBaseTestingUtility.closeRegionAndWAL(region); + } + private void checkGet(Region region, byte[] row, byte[] fam, byte[] col, long time, byte[]... vals) throws IOException { Get g = new Get(row); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMinorCompaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMinorCompaction.java index 9f916a5b876..fbfb0959c7d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMinorCompaction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMinorCompaction.java @@ -123,13 +123,12 @@ public class TestMinorCompaction { public void testMinorCompactionWithDeleteColumn2() throws Exception { Delete dc = new Delete(secondRowBytes); dc.addColumn(fam2, col2); - /* compactionThreshold is 3. The table has 4 versions: 0, 1, 2, and 3. - * we only delete the latest version. One might expect to see only - * versions 1 and 2. HBase differs, and gives us 0, 1 and 2. - * This is okay as well. Since there was no compaction done before the - * delete, version 0 seems to stay on. + /* compactionThreshold is 3. We had inserte a total of 4 versions (0, 1, 2, and 3), + * but family is configured for 3 versions only, thus first one (0) was already overridden + * when we added the fourth (3). Then after deleting the most recent (3), there should be only + * 2 versions (1, 2). */ - testMinorCompactionWithDelete(dc, 3); + testMinorCompactionWithDelete(dc, 2); } @Test