HBASE-15484 Correct the semantic of batch and partial - amend to fix bug and revise the JavaDoc for related APIs.

This commit is contained in:
Phil Yang 2017-03-07 22:27:06 +08:00
parent 6afe696e53
commit f45d261902
4 changed files with 39 additions and 21 deletions

View File

@ -668,6 +668,11 @@ public abstract class ClientScanner extends AbstractClientScanner {
} }
} }
partialResultsCellSizes -= scan.getBatch(); partialResultsCellSizes -= scan.getBatch();
if (partialResultsCellSizes == 0) {
// We have nothing in partialResults, clear the flags to prevent returning empty Result
// when next result belongs to the next row.
clearPartialResults();
}
return Result.create(cells, null, stale, return Result.create(cells, null, stale,
partialResultsCellSizes > 0 || result.mayHaveMoreCellsInRow()); partialResultsCellSizes > 0 || result.mayHaveMoreCellsInRow());
} }

View File

@ -968,10 +968,6 @@ public class Result implements CellScannable, CellScanner {
} }
/** /**
* Whether or not the result is a partial result. Partial results contain a subset of the cells
* for a row and should be combined with a result representing the remaining cells in that row to
* form a complete (non-partial) result.
* @return Whether or not the result is a partial result
* @deprecated the word 'partial' ambiguous, use {@link #mayHaveMoreCellsInRow()} instead. * @deprecated the word 'partial' ambiguous, use {@link #mayHaveMoreCellsInRow()} instead.
* Deprecated since 1.4.0. * Deprecated since 1.4.0.
* @see #mayHaveMoreCellsInRow() * @see #mayHaveMoreCellsInRow()
@ -982,13 +978,12 @@ public class Result implements CellScannable, CellScanner {
} }
/** /**
* For scanning large rows, the RS may choose to return the cells chunk by chunk to prevent OOM. * For scanning large rows, the RS may choose to return the cells chunk by chunk to prevent OOM
* This flag is used to tell you if the current Result is the last one of the current row. False * or timeout. This flag is used to tell you if the current Result is the last one of the current
* means this Result is the last one. True means there are be more cells for the current row. * row. False means this Result is the last one. True means there MAY be more cells belonging to
* <p> * the current row.
* The Scan configuration used to control the result size on the server is * If you don't use {@link Scan#setAllowPartialResults(boolean)} or {@link Scan#setBatch(int)},
* {@link Scan#setMaxResultSize(long)} and the default value can be seen here: * this method will always return false because the Result must contains all cells in one Row.
* {@link HConstants#DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE}
*/ */
public boolean mayHaveMoreCellsInRow() { public boolean mayHaveMoreCellsInRow() {
return mayHaveMoreCellsInRow; return mayHaveMoreCellsInRow;

View File

@ -595,14 +595,13 @@ public class Scan extends Query {
} }
/** /**
* Set the maximum number of values to return for each call to next(). * Set the maximum number of cells to return for each call to next(). Callers should be aware
* Callers should be aware that invoking this method with any value * that this is not equivalent to calling {@link #setAllowPartialResults(boolean)}.
* is equivalent to calling {@link #setAllowPartialResults(boolean)} * If you don't allow partial results, the number of cells in each Result must equal to your
* with a value of {@code true}; partial results may be returned if * batch setting unless it is the last Result for current row. So this method is helpful in paging
* this method is called. Use {@link #setMaxResultSize(long)}} to * queries. If you just want to prevent OOM at client, use setAllowPartialResults(true) is better.
* limit the size of a Scan's Results instead.
*
* @param batch the maximum number of values * @param batch the maximum number of values
* @see Result#mayHaveMoreCellsInRow()
*/ */
public Scan setBatch(int batch) { public Scan setBatch(int batch) {
if (this.hasFilter() && this.filter.hasFilterRow()) { if (this.hasFilter() && this.filter.hasFilterRow()) {
@ -847,11 +846,14 @@ public class Scan extends Query {
} }
/** /**
* Setting whether the caller wants to see the partial results that may be returned from the * Setting whether the caller wants to see the partial results when server returns
* server. By default this value is false and the complete results will be assembled client side * less-than-expected cells. It is helpful while scanning a huge row to prevent OOM at client.
* By default this value is false and the complete results will be assembled client side
* before being delivered to the caller. * before being delivered to the caller.
* @param allowPartialResults * @param allowPartialResults
* @return this * @return this
* @see Result#mayHaveMoreCellsInRow()
* @see #setBatch(int)
*/ */
public Scan setAllowPartialResults(final boolean allowPartialResults) { public Scan setAllowPartialResults(final boolean allowPartialResults) {
this.allowPartialResults = allowPartialResults; this.allowPartialResults = allowPartialResults;

View File

@ -1074,4 +1074,20 @@ public class TestPartialResultsFromClientSide {
scanner.close(); scanner.close();
} }
} @Test
public void testMayHaveMoreCellsInRowReturnsTrueAndSetBatch() throws IOException {
Table table = createTestTable(TableName.valueOf(
"testMayHaveMoreCellsInRowReturnsTrueAndSetBatch"), ROWS, FAMILIES,
QUALIFIERS, VALUE);
Scan scan = new Scan();
scan.setBatch(1);
scan.setFilter(new FirstKeyOnlyFilter());
ResultScanner scanner = table.getScanner(scan);
Result result;
while ((result = scanner.next()) != null) {
assertTrue(result.rawCells() != null);
assertEquals(1, result.rawCells().length);
}
}
}