HBASE-17276 Only log stacktraces for exceptions once for updates in a batch
For large batches of updates, repeatedly logging WrongRegionExceptions, FailedSanityCheckExceptions, and/or NoSuchColumnFamilyExceptions can easily dominate the contents of a RegionServer log. After the first occurence of logging the full exception, switch to logging only the message on the exception.
This commit is contained in:
parent
b3ae87bd7d
commit
b554e05410
|
@ -550,6 +550,57 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class that tracks exceptions that have been observed in one batch. Not thread safe.
|
||||||
|
*/
|
||||||
|
static class ObservedExceptionsInBatch {
|
||||||
|
private boolean wrongRegion = false;
|
||||||
|
private boolean failedSanityCheck = false;
|
||||||
|
private boolean wrongFamily = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return If a {@link WrongRegionException} has been observed.
|
||||||
|
*/
|
||||||
|
boolean hasSeenWrongRegion() {
|
||||||
|
return wrongRegion;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Records that a {@link WrongRegionException} has been observed.
|
||||||
|
*/
|
||||||
|
void sawWrongRegion() {
|
||||||
|
wrongRegion = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return If a {@link FailedSanityCheckException} has been observed.
|
||||||
|
*/
|
||||||
|
boolean hasSeenFailedSanityCheck() {
|
||||||
|
return failedSanityCheck;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Records that a {@link FailedSanityCheckException} has been observed.
|
||||||
|
*/
|
||||||
|
void sawFailedSanityCheck() {
|
||||||
|
failedSanityCheck = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return If a {@link NoSuchColumnFamilyException} has been observed.
|
||||||
|
*/
|
||||||
|
boolean hasSeenNoSuchFamily() {
|
||||||
|
return wrongFamily;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Records that a {@link NoSuchColumnFamilyException} has been observed.
|
||||||
|
*/
|
||||||
|
void sawNoSuchFamily() {
|
||||||
|
wrongFamily = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
final WriteState writestate = new WriteState();
|
final WriteState writestate = new WriteState();
|
||||||
|
|
||||||
long memstoreFlushSize;
|
long memstoreFlushSize;
|
||||||
|
@ -3107,12 +3158,13 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
||||||
/** Keep track of the locks we hold so we can release them in finally clause */
|
/** Keep track of the locks we hold so we can release them in finally clause */
|
||||||
List<RowLock> acquiredRowLocks = Lists.newArrayListWithCapacity(batchOp.operations.length);
|
List<RowLock> acquiredRowLocks = Lists.newArrayListWithCapacity(batchOp.operations.length);
|
||||||
MemstoreSize memstoreSize = new MemstoreSize();
|
MemstoreSize memstoreSize = new MemstoreSize();
|
||||||
|
final ObservedExceptionsInBatch observedExceptions = new ObservedExceptionsInBatch();
|
||||||
try {
|
try {
|
||||||
// STEP 1. Try to acquire as many locks as we can, and ensure we acquire at least one.
|
// STEP 1. Try to acquire as many locks as we can, and ensure we acquire at least one.
|
||||||
int numReadyToWrite = 0;
|
int numReadyToWrite = 0;
|
||||||
long now = EnvironmentEdgeManager.currentTime();
|
long now = EnvironmentEdgeManager.currentTime();
|
||||||
while (lastIndexExclusive < batchOp.operations.length) {
|
while (lastIndexExclusive < batchOp.operations.length) {
|
||||||
if (checkBatchOp(batchOp, lastIndexExclusive, familyMaps, now)) {
|
if (checkBatchOp(batchOp, lastIndexExclusive, familyMaps, now, observedExceptions)) {
|
||||||
lastIndexExclusive++;
|
lastIndexExclusive++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -3477,7 +3529,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean checkBatchOp(BatchOperation<?> batchOp, final int lastIndexExclusive,
|
private boolean checkBatchOp(BatchOperation<?> batchOp, final int lastIndexExclusive,
|
||||||
final Map<byte[], List<Cell>>[] familyMaps, final long now)
|
final Map<byte[], List<Cell>>[] familyMaps, final long now,
|
||||||
|
final ObservedExceptionsInBatch observedExceptions)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
boolean skip = false;
|
boolean skip = false;
|
||||||
// Skip anything that "ran" already
|
// Skip anything that "ran" already
|
||||||
|
@ -3493,17 +3546,35 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
||||||
try {
|
try {
|
||||||
checkAndPrepareMutation(mutation, batchOp.isInReplay(), familyMap, now);
|
checkAndPrepareMutation(mutation, batchOp.isInReplay(), familyMap, now);
|
||||||
} catch (NoSuchColumnFamilyException nscf) {
|
} catch (NoSuchColumnFamilyException nscf) {
|
||||||
LOG.warn("No such column family in batch mutation", nscf);
|
final String msg = "No such column family in batch mutation. ";
|
||||||
|
if (observedExceptions.hasSeenNoSuchFamily()) {
|
||||||
|
LOG.warn(msg + nscf.getMessage());
|
||||||
|
} else {
|
||||||
|
LOG.warn(msg, nscf);
|
||||||
|
observedExceptions.sawNoSuchFamily();
|
||||||
|
}
|
||||||
batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus(
|
batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus(
|
||||||
OperationStatusCode.BAD_FAMILY, nscf.getMessage());
|
OperationStatusCode.BAD_FAMILY, nscf.getMessage());
|
||||||
skip = true;
|
skip = true;
|
||||||
} catch (FailedSanityCheckException fsce) {
|
} catch (FailedSanityCheckException fsce) {
|
||||||
LOG.warn("Batch Mutation did not pass sanity check", fsce);
|
final String msg = "Batch Mutation did not pass sanity check. ";
|
||||||
|
if (observedExceptions.hasSeenFailedSanityCheck()) {
|
||||||
|
LOG.warn(msg + fsce.getMessage());
|
||||||
|
} else {
|
||||||
|
LOG.warn(msg, fsce);
|
||||||
|
observedExceptions.sawFailedSanityCheck();
|
||||||
|
}
|
||||||
batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus(
|
batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus(
|
||||||
OperationStatusCode.SANITY_CHECK_FAILURE, fsce.getMessage());
|
OperationStatusCode.SANITY_CHECK_FAILURE, fsce.getMessage());
|
||||||
skip = true;
|
skip = true;
|
||||||
} catch (WrongRegionException we) {
|
} catch (WrongRegionException we) {
|
||||||
LOG.warn("Batch mutation had a row that does not belong to this region", we);
|
final String msg = "Batch mutation had a row that does not belong to this region. ";
|
||||||
|
if (observedExceptions.hasSeenWrongRegion()) {
|
||||||
|
LOG.warn(msg + we.getMessage());
|
||||||
|
} else {
|
||||||
|
LOG.warn(msg, we);
|
||||||
|
observedExceptions.sawWrongRegion();
|
||||||
|
}
|
||||||
batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus(
|
batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus(
|
||||||
OperationStatusCode.SANITY_CHECK_FAILURE, we.getMessage());
|
OperationStatusCode.SANITY_CHECK_FAILURE, we.getMessage());
|
||||||
skip = true;
|
skip = true;
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.regionserver;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.regionserver.HRegion.ObservedExceptionsInBatch;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test class for {@link ObservedExceptionsInBatch}.
|
||||||
|
*/
|
||||||
|
@Category(SmallTests.class)
|
||||||
|
public class TestObservedExceptionsInBatch {
|
||||||
|
|
||||||
|
private ObservedExceptionsInBatch observedExceptions;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() {
|
||||||
|
observedExceptions = new ObservedExceptionsInBatch();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoObservationsOnCreation() {
|
||||||
|
assertFalse(observedExceptions.hasSeenFailedSanityCheck());
|
||||||
|
assertFalse(observedExceptions.hasSeenNoSuchFamily());
|
||||||
|
assertFalse(observedExceptions.hasSeenWrongRegion());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testObservedAfterRecording() {
|
||||||
|
observedExceptions.sawFailedSanityCheck();
|
||||||
|
assertTrue(observedExceptions.hasSeenFailedSanityCheck());
|
||||||
|
observedExceptions.sawNoSuchFamily();
|
||||||
|
assertTrue(observedExceptions.hasSeenNoSuchFamily());
|
||||||
|
observedExceptions.sawWrongRegion();
|
||||||
|
assertTrue(observedExceptions.hasSeenWrongRegion());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue