HBASE-15484 Correct the semantic of batch and partial - amend to fix bug and revise the JavaDoc for related APIs.

This commit is contained in:
Phil Yang 2017-03-07 22:27:06 +08:00
parent 6afe696e53
commit f45d261902
4 changed files with 39 additions and 21 deletions

View File

@ -668,6 +668,11 @@ public abstract class ClientScanner extends AbstractClientScanner {
}
}
partialResultsCellSizes -= scan.getBatch();
if (partialResultsCellSizes == 0) {
// We have nothing in partialResults, clear the flags to prevent returning empty Result
// when next result belongs to the next row.
clearPartialResults();
}
return Result.create(cells, null, stale,
partialResultsCellSizes > 0 || result.mayHaveMoreCellsInRow());
}

View File

@ -968,10 +968,6 @@ public class Result implements CellScannable, CellScanner {
}
/**
* Whether or not the result is a partial result. Partial results contain a subset of the cells
* for a row and should be combined with a result representing the remaining cells in that row to
* form a complete (non-partial) result.
* @return Whether or not the result is a partial result
* @deprecated the word 'partial' ambiguous, use {@link #mayHaveMoreCellsInRow()} instead.
* Deprecated since 1.4.0.
* @see #mayHaveMoreCellsInRow()
@ -982,13 +978,12 @@ public class Result implements CellScannable, CellScanner {
}
/**
* For scanning large rows, the RS may choose to return the cells chunk by chunk to prevent OOM.
* This flag is used to tell you if the current Result is the last one of the current row. False
* means this Result is the last one. True means there are be more cells for the current row.
* <p>
* The Scan configuration used to control the result size on the server is
* {@link Scan#setMaxResultSize(long)} and the default value can be seen here:
* {@link HConstants#DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE}
* For scanning large rows, the RS may choose to return the cells chunk by chunk to prevent OOM
* or timeout. This flag is used to tell you if the current Result is the last one of the current
* row. False means this Result is the last one. True means there MAY be more cells belonging to
* the current row.
* If you don't use {@link Scan#setAllowPartialResults(boolean)} or {@link Scan#setBatch(int)},
* this method will always return false because the Result must contains all cells in one Row.
*/
public boolean mayHaveMoreCellsInRow() {
return mayHaveMoreCellsInRow;

View File

@ -595,14 +595,13 @@ public class Scan extends Query {
}
/**
* Set the maximum number of values to return for each call to next().
* Callers should be aware that invoking this method with any value
* is equivalent to calling {@link #setAllowPartialResults(boolean)}
* with a value of {@code true}; partial results may be returned if
* this method is called. Use {@link #setMaxResultSize(long)}} to
* limit the size of a Scan's Results instead.
*
* Set the maximum number of cells to return for each call to next(). Callers should be aware
* that this is not equivalent to calling {@link #setAllowPartialResults(boolean)}.
* If you don't allow partial results, the number of cells in each Result must equal to your
* batch setting unless it is the last Result for current row. So this method is helpful in paging
* queries. If you just want to prevent OOM at client, use setAllowPartialResults(true) is better.
* @param batch the maximum number of values
* @see Result#mayHaveMoreCellsInRow()
*/
public Scan setBatch(int batch) {
if (this.hasFilter() && this.filter.hasFilterRow()) {
@ -847,11 +846,14 @@ public class Scan extends Query {
}
/**
* Setting whether the caller wants to see the partial results that may be returned from the
* server. By default this value is false and the complete results will be assembled client side
* Setting whether the caller wants to see the partial results when server returns
* less-than-expected cells. It is helpful while scanning a huge row to prevent OOM at client.
* By default this value is false and the complete results will be assembled client side
* before being delivered to the caller.
* @param allowPartialResults
* @return this
* @see Result#mayHaveMoreCellsInRow()
* @see #setBatch(int)
*/
public Scan setAllowPartialResults(final boolean allowPartialResults) {
this.allowPartialResults = allowPartialResults;

View File

@ -1074,4 +1074,20 @@ public class TestPartialResultsFromClientSide {
scanner.close();
}
}
@Test
public void testMayHaveMoreCellsInRowReturnsTrueAndSetBatch() throws IOException {
Table table = createTestTable(TableName.valueOf(
"testMayHaveMoreCellsInRowReturnsTrueAndSetBatch"), ROWS, FAMILIES,
QUALIFIERS, VALUE);
Scan scan = new Scan();
scan.setBatch(1);
scan.setFilter(new FirstKeyOnlyFilter());
ResultScanner scanner = table.getScanner(scan);
Result result;
while ((result = scanner.next()) != null) {
assertTrue(result.rawCells() != null);
assertEquals(1, result.rawCells().length);
}
}
}