HBASE-13271 Added test for batch operations with validation errors. Updated Javadoc for batch methods.

Javadoc for following methods are updated:
* Table.put(List<Put> puts)
* Table.delete(List<Delete> deletes)

Added @apiNote for delete regarding input list will not be modied in version 3.0.0

Signed-off-by: Michael Stack <stack@apache.org>
This commit is contained in:
Umesh Agashe 2017-09-09 02:10:00 -07:00 committed by Michael Stack
parent 1d278d3de6
commit a11a17825a
4 changed files with 148 additions and 37 deletions

View File

@ -552,6 +552,7 @@ public class HTable implements Table {
} catch (InterruptedException e) { } catch (InterruptedException e) {
throw (InterruptedIOException)new InterruptedIOException().initCause(e); throw (InterruptedIOException)new InterruptedIOException().initCause(e);
} finally { } finally {
// TODO: to be consistent with batch put(), do not modify input list
// mutate list so that it is empty for complete success, or contains only failed records // mutate list so that it is empty for complete success, or contains only failed records
// results are returned in the same order as the requests in list walk the list backwards, // results are returned in the same order as the requests in list walk the list backwards,
// so we can remove from list without impacting the indexes of earlier members // so we can remove from list without impacting the indexes of earlier members

View File

@ -148,17 +148,21 @@ public interface Table extends Closeable {
Result get(Get get) throws IOException; Result get(Get get) throws IOException;
/** /**
* Extracts certain cells from the given rows, in batch. * Extracts specified cells from the given rows, as a batch.
* *
* @param gets The objects that specify what data to fetch and from which rows. * @param gets The objects that specify what data to fetch and from which rows.
* @return The data coming from the specified rows, if it exists. If the row specified doesn't * @return The data coming from the specified rows, if it exists. If the row specified doesn't
* exist, the {@link Result} instance returned won't contain any {@link * exist, the {@link Result} instance returned won't contain any {@link
* org.apache.hadoop.hbase.KeyValue}, as indicated by {@link Result#isEmpty()}. If there are any * org.apache.hadoop.hbase.Cell}s, as indicated by {@link Result#isEmpty()}. If there are any
* failures even after retries, there will be a null in the results array for those Gets, AND an * failures even after retries, there will be a <code>null</code> in the results' array for those
* exception will be thrown. The ordering of the Result array corresponds to the order of the * Gets, AND an exception will be thrown. The ordering of the Result array corresponds to the order
* list of Get requests. * of the list of passed in Gets.
* @throws IOException if a remote or network exception occurs. * @throws IOException if a remote or network exception occurs.
* @since 0.90.0 * @since 0.90.0
* @apiNote {@link #put(List)} runs pre-flight validations on the input list on client.
* Currently {@link #get(List)} doesn't run any validations on the client-side, currently there
* is no need, but this may change in the future. An
* {@link IllegalArgumentException} will be thrown in this case.
*/ */
Result[] get(List<Get> gets) throws IOException; Result[] get(List<Get> gets) throws IOException;
@ -207,9 +211,17 @@ public interface Table extends Closeable {
void put(Put put) throws IOException; void put(Put put) throws IOException;
/** /**
* Puts some data in the table, in batch. * Batch puts the specified data into the table.
* <p> * <p>
* This can be used for group commit, or for submitting user defined batches. * This can be used for group commit, or for submitting user defined batches. Before sending
* a batch of mutations to the server, the client runs a few validations on the input list. If an
* error is found, for example, a mutation was supplied but was missing it's column an
* {@link IllegalArgumentException} will be thrown and no mutations will be applied. If there
* are any failures even after retries, a {@link RetriesExhaustedWithDetailsException} will be
* thrown. RetriesExhaustedWithDetailsException contains lists of failed mutations and
* corresponding remote exceptions. The ordering of mutations and exceptions in the
* encapsulating exception corresponds to the order of the input list of Put requests.
*
* @param puts The list of mutations to apply. * @param puts The list of mutations to apply.
* @throws IOException if a remote or network exception occurs. * @throws IOException if a remote or network exception occurs.
* @since 0.20.0 * @since 0.20.0
@ -289,15 +301,27 @@ public interface Table extends Closeable {
void delete(Delete delete) throws IOException; void delete(Delete delete) throws IOException;
/** /**
* Deletes the specified cells/rows in bulk. * Batch Deletes the specified cells/rows from the table.
* @param deletes List of things to delete. List gets modified by this * <p>
* method (in particular it gets re-ordered, so the order in which the elements * If a specified row does not exist, {@link Delete} will report as though sucessful
* are inserted in the list gives no guarantee as to the order in which the * delete; no exception will be thrown. If there are any failures even after retries,
* {@link Delete}s are executed). * a * {@link RetriesExhaustedWithDetailsException} will be thrown.
* RetriesExhaustedWithDetailsException contains lists of failed {@link Delete}s and
* corresponding remote exceptions.
*
* @param deletes List of things to delete. The input list gets modified by this
* method. All successfully applied {@link Delete}s in the list are removed (in particular it
* gets re-ordered, so the order in which the elements are inserted in the list gives no
* guarantee as to the order in which the {@link Delete}s are executed).
* @throws IOException if a remote or network exception occurs. In that case * @throws IOException if a remote or network exception occurs. In that case
* the {@code deletes} argument will contain the {@link Delete} instances * the {@code deletes} argument will contain the {@link Delete} instances
* that have not be successfully applied. * that have not be successfully applied.
* @since 0.20.1 * @since 0.20.1
* @apiNote In 3.0.0 version, the input list {@code deletes} will no longer be modified. Also,
* {@link #put(List)} runs pre-flight validations on the input list on client. Currently
* {@link #delete(List)} doesn't run validations on the client, there is no need currently,
* but this may change in the future. An * {@link IllegalArgumentException} will be thrown
* in this case.
*/ */
void delete(List<Delete> deletes) throws IOException; void delete(List<Delete> deletes) throws IOException;

View File

@ -76,8 +76,6 @@ import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.coprocessor.RegionObserver; import org.apache.hadoop.hbase.coprocessor.RegionObserver;
import org.apache.hadoop.hbase.exceptions.ScannerResetException; import org.apache.hadoop.hbase.exceptions.ScannerResetException;
import org.apache.hadoop.hbase.filter.BinaryComparator; import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
@ -116,9 +114,7 @@ import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
import org.junit.After;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Ignore; import org.junit.Ignore;
import org.junit.Rule; import org.junit.Rule;
@ -139,6 +135,7 @@ public class TestFromClientSide {
protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static byte [] ROW = Bytes.toBytes("testRow"); private static byte [] ROW = Bytes.toBytes("testRow");
private static byte [] FAMILY = Bytes.toBytes("testFamily"); private static byte [] FAMILY = Bytes.toBytes("testFamily");
private static final byte[] INVALID_FAMILY = Bytes.toBytes("invalidTestFamily");
private static byte [] QUALIFIER = Bytes.toBytes("testQualifier"); private static byte [] QUALIFIER = Bytes.toBytes("testQualifier");
private static byte [] VALUE = Bytes.toBytes("testValue"); private static byte [] VALUE = Bytes.toBytes("testValue");
protected static int SLAVES = 3; protected static int SLAVES = 3;
@ -173,22 +170,6 @@ public class TestFromClientSide {
TEST_UTIL.shutdownMiniCluster(); TEST_UTIL.shutdownMiniCluster();
} }
/**
* @throws java.lang.Exception
*/
@Before
public void setUp() throws Exception {
// Nothing to do.
}
/**
* @throws java.lang.Exception
*/
@After
public void tearDown() throws Exception {
// Nothing to do.
}
/** /**
* Test append result when there are duplicate rpc request. * Test append result when there are duplicate rpc request.
*/ */
@ -2377,6 +2358,115 @@ public class TestFromClientSide {
} }
} }
/**
* Test batch operations with combination of valid and invalid args
*/
@Test
public void testBatchOperationsWithErrors() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
Table foo = TEST_UTIL.createTable(tableName, new byte[][] {FAMILY}, 10);
int NUM_OPS = 100;
int FAILED_OPS = 50;
RetriesExhaustedWithDetailsException expectedException = null;
IllegalArgumentException iae = null;
// 1.1 Put with no column families (local validation, runtime exception)
List<Put> puts = new ArrayList<Put>(NUM_OPS);
for (int i = 0; i != NUM_OPS; i++) {
Put put = new Put(Bytes.toBytes(i));
puts.add(put);
}
try {
foo.put(puts);
} catch (IllegalArgumentException e) {
iae = e;
}
assertNotNull(iae);
assertEquals(NUM_OPS, puts.size());
// 1.2 Put with invalid column family
iae = null;
puts.clear();
for (int i = 0; i != NUM_OPS; i++) {
Put put = new Put(Bytes.toBytes(i));
put.addColumn((i % 2) == 0 ? FAMILY : INVALID_FAMILY, FAMILY, Bytes.toBytes(i));
puts.add(put);
}
try {
foo.put(puts);
} catch (RetriesExhaustedWithDetailsException e) {
expectedException = e;
}
assertNotNull(expectedException);
assertEquals(FAILED_OPS, expectedException.exceptions.size());
assertTrue(expectedException.actions.contains(puts.get(1)));
// 2.1 Get non-existent rows
List<Get> gets = new ArrayList<>(NUM_OPS);
for (int i = 0; i < NUM_OPS; i++) {
Get get = new Get(Bytes.toBytes(i));
// get.addColumn(FAMILY, FAMILY);
gets.add(get);
}
Result[] getsResult = foo.get(gets);
assertNotNull(getsResult);
assertEquals(NUM_OPS, getsResult.length);
assertNull(getsResult[1].getRow());
// 2.2 Get with invalid column family
gets.clear();
getsResult = null;
expectedException = null;
for (int i = 0; i < NUM_OPS; i++) {
Get get = new Get(Bytes.toBytes(i));
get.addColumn((i % 2) == 0 ? FAMILY : INVALID_FAMILY, FAMILY);
gets.add(get);
}
try {
getsResult = foo.get(gets);
} catch (RetriesExhaustedWithDetailsException e) {
expectedException = e;
}
assertNull(getsResult);
assertNotNull(expectedException);
assertEquals(FAILED_OPS, expectedException.exceptions.size());
assertTrue(expectedException.actions.contains(gets.get(1)));
// 3.1 Delete with invalid column family
expectedException = null;
List<Delete> deletes = new ArrayList<>(NUM_OPS);
for (int i = 0; i < NUM_OPS; i++) {
Delete delete = new Delete(Bytes.toBytes(i));
delete.addColumn((i % 2) == 0 ? FAMILY : INVALID_FAMILY, FAMILY);
deletes.add(delete);
}
try {
foo.delete(deletes);
} catch (RetriesExhaustedWithDetailsException e) {
expectedException = e;
}
assertEquals((NUM_OPS - FAILED_OPS), deletes.size());
assertNotNull(expectedException);
assertEquals(FAILED_OPS, expectedException.exceptions.size());
assertTrue(expectedException.actions.contains(deletes.get(1)));
// 3.2 Delete non-existent rows
deletes.clear();
for (int i = 0; i < NUM_OPS; i++) {
Delete delete = new Delete(Bytes.toBytes(i));
deletes.add(delete);
}
foo.delete(deletes);
assertTrue(deletes.isEmpty());
}
/* /*
* Baseline "scalability" test. * Baseline "scalability" test.
* *
@ -5421,7 +5511,7 @@ public class TestFromClientSide {
List<HRegionLocation> regionsInRange = new ArrayList<>(); List<HRegionLocation> regionsInRange = new ArrayList<>();
byte[] currentKey = startKey; byte[] currentKey = startKey;
final boolean endKeyIsEndOfTable = Bytes.equals(endKey, HConstants.EMPTY_END_ROW); final boolean endKeyIsEndOfTable = Bytes.equals(endKey, HConstants.EMPTY_END_ROW);
try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(tableName);) { try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
do { do {
HRegionLocation regionLocation = r.getRegionLocation(currentKey); HRegionLocation regionLocation = r.getRegionLocation(currentKey);
regionsInRange.add(regionLocation); regionsInRange.add(regionLocation);

View File

@ -22,17 +22,13 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.List; import java.util.List;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.RegionState.State;