From f4cec2e2027baab07537d7e193105b21ca5dc02a Mon Sep 17 00:00:00 2001 From: Gary Helmling Date: Wed, 22 Jun 2016 17:08:28 -0700 Subject: [PATCH] HBASE-16085 Add a metric for failed compactions --- .../regionserver/MetricsRegionSource.java | 2 + .../regionserver/MetricsRegionWrapper.java | 7 +++ .../regionserver/MetricsRegionSourceImpl.java | 4 ++ .../TestMetricsRegionSourceImpl.java | 5 +++ .../regionserver/CompactSplitThread.java | 2 + .../hadoop/hbase/regionserver/HRegion.java | 10 ++++- .../MetricsRegionWrapperImpl.java | 5 +++ .../MetricsRegionWrapperStub.java | 5 +++ .../hbase/regionserver/TestCompaction.java | 43 +++++++++++++++++++ 9 files changed, 81 insertions(+), 2 deletions(-) diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSource.java index 8dc7e113e17..911c75708a4 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSource.java @@ -28,9 +28,11 @@ public interface MetricsRegionSource extends Comparable { String OPS_SAMPLE_NAME = "ops"; String SIZE_VALUE_NAME = "size"; String COMPACTIONS_COMPLETED_COUNT = "compactionsCompletedCount"; + String COMPACTIONS_FAILED_COUNT = "compactionsFailedCount"; String NUM_BYTES_COMPACTED_COUNT = "numBytesCompactedCount"; String NUM_FILES_COMPACTED_COUNT = "numFilesCompactedCount"; String COMPACTIONS_COMPLETED_DESC = "Number of compactions that have completed."; + String COMPACTIONS_FAILED_DESC = "Number of compactions that have failed."; String NUM_BYTES_COMPACTED_DESC = "Sum of filesize on all files entering a finished, successful or aborted, compaction"; String NUM_FILES_COMPACTED_DESC = diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapper.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapper.java index a7c7096ab90..e3fd5c34d98 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapper.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapper.java @@ -105,6 +105,13 @@ public interface MetricsRegionWrapper { long getNumCompactionsCompleted(); + /** + * Returns the total number of compactions that have been reported as failed on this region. + * Note that a given compaction can be reported as both completed and failed if an exception + * is thrown in the processing after {@code HRegion.compact()}. + */ + long getNumCompactionsFailed(); + int getRegionHashCode(); /** diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java index de46ac779e2..c0d71d5e813 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java @@ -264,6 +264,10 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource { regionNamePrefix + MetricsRegionSource.COMPACTIONS_COMPLETED_COUNT, MetricsRegionSource.COMPACTIONS_COMPLETED_DESC), this.regionWrapper.getNumCompactionsCompleted()); + mrb.addCounter(Interns.info( + regionNamePrefix + MetricsRegionSource.COMPACTIONS_FAILED_COUNT, + MetricsRegionSource.COMPACTIONS_FAILED_DESC), + this.regionWrapper.getNumCompactionsFailed()); mrb.addCounter(Interns.info( regionNamePrefix + MetricsRegionSource.NUM_BYTES_COMPACTED_COUNT, MetricsRegionSource.NUM_BYTES_COMPACTED_DESC), diff --git a/hbase-hadoop2-compat/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionSourceImpl.java b/hbase-hadoop2-compat/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionSourceImpl.java index 4f5a8bd14e5..cb97570c245 100644 --- a/hbase-hadoop2-compat/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionSourceImpl.java +++ b/hbase-hadoop2-compat/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionSourceImpl.java @@ -151,6 +151,11 @@ public class TestMetricsRegionSourceImpl { return 0; } + @Override + public long getNumCompactionsFailed() { + return 0; + } + @Override public int getRegionHashCode() { return regionName.hashCode(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java index 579f78ed516..c1f82b98ade 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java @@ -547,9 +547,11 @@ public class CompactSplitThread implements CompactionRequestor, PropagatingConfi if (remoteEx != ex) { LOG.info("Compaction failed at original callstack: " + formatStackTrace(ex)); } + region.reportCompactionRequestFailure(); server.checkFileSystem(); } catch (Exception ex) { LOG.error("Compaction failed " + this, ex); + region.reportCompactionRequestFailure(); server.checkFileSystem(); } finally { LOG.debug("CompactSplitThread Status: " + CompactSplitThread.this); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 1e007e71424..9f6a03ae101 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -279,6 +279,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // Compaction counters final AtomicLong compactionsFinished = new AtomicLong(0L); + final AtomicLong compactionsFailed = new AtomicLong(0L); final AtomicLong compactionNumFilesCompacted = new AtomicLong(0L); final AtomicLong compactionNumBytesCompacted = new AtomicLong(0L); @@ -7507,7 +7508,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi public static final long FIXED_OVERHEAD = ClassSize.align( ClassSize.OBJECT + ClassSize.ARRAY + - 47 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT + + 48 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT + (14 * Bytes.SIZEOF_LONG) + 5 * Bytes.SIZEOF_BOOLEAN); @@ -7524,7 +7525,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi public static final long DEEP_OVERHEAD = FIXED_OVERHEAD + ClassSize.OBJECT + // closeLock (2 * ClassSize.ATOMIC_BOOLEAN) + // closed, closing - (3 * ClassSize.ATOMIC_LONG) + // memStoreSize, numPutsWithoutWAL, dataInMemoryWithoutWAL + (4 * ClassSize.ATOMIC_LONG) + // memStoreSize, numPutsWithoutWAL, dataInMemoryWithoutWAL, + // compactionsFailed (2 * ClassSize.CONCURRENT_HASHMAP) + // lockedRows, scannerReadPoints WriteState.HEAP_SIZE + // writestate ClassSize.CONCURRENT_SKIPLISTMAP + ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY + // stores @@ -7973,6 +7975,10 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi assert newValue >= 0; } + public void reportCompactionRequestFailure() { + compactionsFailed.incrementAndGet(); + } + @VisibleForTesting public long getReadPoint() { return getReadPoint(IsolationLevel.READ_COMMITTED); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperImpl.java index f9e01cd7a6a..01e6d06eb9f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperImpl.java @@ -140,6 +140,11 @@ public class MetricsRegionWrapperImpl implements MetricsRegionWrapper, Closeable return this.region.compactionsFinished.get(); } + @Override + public long getNumCompactionsFailed() { + return this.region.compactionsFailed.get(); + } + @Override public long getMaxStoreFileAge() { return maxStoreFileAge; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperStub.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperStub.java index 1ab44eced97..72b8b6420f2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperStub.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionWrapperStub.java @@ -120,6 +120,11 @@ public class MetricsRegionWrapperStub implements MetricsRegionWrapper { return 0; } + @Override + public long getNumCompactionsFailed() { + return 0; + } + @Override public int getRegionHashCode() { return 42; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java index d7cb4a17439..fa630a21d4c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java @@ -36,6 +36,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -297,6 +298,48 @@ public class TestCompaction { thread.interruptIfNecessary(); } + @Test + public void testCompactionFailure() throws Exception { + // setup a compact/split thread on a mock server + HRegionServer mockServer = Mockito.mock(HRegionServer.class); + Mockito.when(mockServer.getConfiguration()).thenReturn(r.getBaseConf()); + CompactSplitThread thread = new CompactSplitThread(mockServer); + Mockito.when(mockServer.getCompactSplitThread()).thenReturn(thread); + + // setup a region/store with some files + Store store = r.getStore(COLUMN_FAMILY); + createStoreFile(r); + for (int i = 0; i < HStore.DEFAULT_BLOCKING_STOREFILE_COUNT - 1; i++) { + createStoreFile(r); + } + + HRegion mockRegion = Mockito.spy(r); + Mockito.when(mockRegion.checkSplit()).thenThrow(new IndexOutOfBoundsException()); + + MetricsRegionWrapper metricsWrapper = new MetricsRegionWrapperImpl(r); + + long preCompletedCount = metricsWrapper.getNumCompactionsCompleted(); + long preFailedCount = metricsWrapper.getNumCompactionsFailed(); + + CountDownLatch latch = new CountDownLatch(1); + TrackableCompactionRequest request = new TrackableCompactionRequest(latch); + thread.requestCompaction(mockRegion, store, "test custom comapction", Store.PRIORITY_USER, + request, null); + // wait for the latch to complete. + latch.await(120, TimeUnit.SECONDS); + + // compaction should have completed and been marked as failed due to error in split request + long postCompletedCount = metricsWrapper.getNumCompactionsCompleted(); + long postFailedCount = metricsWrapper.getNumCompactionsFailed(); + + assertTrue("Completed count should have increased (pre=" + preCompletedCount + + ", post="+postCompletedCount+")", + postCompletedCount > preCompletedCount); + assertTrue("Failed count should have increased (pre=" + preFailedCount + + ", post=" + postFailedCount + ")", + postFailedCount > preFailedCount); + } + /** * HBASE-7947: Regression test to ensure adding to the correct list in the * {@link CompactSplitThread}