From b635414e8337be7bc14ca6ae605749c35569b4f7 Mon Sep 17 00:00:00 2001 From: Phil Yang Date: Tue, 7 Mar 2017 22:23:56 +0800 Subject: [PATCH] HBASE-15484 Correct the semantic of batch and partial - amend to fix bug and revise the JavaDoc for related APIs. --- .../hadoop/hbase/client/ClientScanner.java | 5 +++++ .../apache/hadoop/hbase/client/Result.java | 17 ++++++---------- .../org/apache/hadoop/hbase/client/Scan.java | 20 ++++++++++--------- .../TestPartialResultsFromClientSide.java | 15 ++++++++++++++ 4 files changed, 37 insertions(+), 20 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java index b11a84131d3..9be4d6dd4d8 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java @@ -672,6 +672,11 @@ public abstract class ClientScanner extends AbstractClientScanner { } } partialResultsCellSizes -= scan.getBatch(); + if (partialResultsCellSizes == 0) { + // We have nothing in partialResults, clear the flags to prevent returning empty Result + // when next result belongs to the next row. + clearPartialResults(); + } return Result.create(cells, null, stale, partialResultsCellSizes > 0 || result.mayHaveMoreCellsInRow()); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Result.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Result.java index 0fadbd9e3e3..b4c24fe59d0 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Result.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Result.java @@ -906,10 +906,6 @@ public class Result implements CellScannable, CellScanner { } /** - * Whether or not the result is a partial result. Partial results contain a subset of the cells - * for a row and should be combined with a result representing the remaining cells in that row to - * form a complete (non-partial) result. - * @return Whether or not the result is a partial result * @deprecated the word 'partial' ambiguous, use {@link #mayHaveMoreCellsInRow()} instead. * Deprecated since 1.4.0. * @see #mayHaveMoreCellsInRow() @@ -920,13 +916,12 @@ public class Result implements CellScannable, CellScanner { } /** - * For scanning large rows, the RS may choose to return the cells chunk by chunk to prevent OOM. - * This flag is used to tell you if the current Result is the last one of the current row. False - * means this Result is the last one. True means there are be more cells for the current row. - *

- * The Scan configuration used to control the result size on the server is - * {@link Scan#setMaxResultSize(long)} and the default value can be seen here: - * {@link HConstants#DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE} + * For scanning large rows, the RS may choose to return the cells chunk by chunk to prevent OOM + * or timeout. This flag is used to tell you if the current Result is the last one of the current + * row. False means this Result is the last one. True means there MAY be more cells belonging to + * the current row. + * If you don't use {@link Scan#setAllowPartialResults(boolean)} or {@link Scan#setBatch(int)}, + * this method will always return false because the Result must contains all cells in one Row. */ public boolean mayHaveMoreCellsInRow() { return mayHaveMoreCellsInRow; diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java index c4b704495d5..0f9a9af0307 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java @@ -610,14 +610,13 @@ public class Scan extends Query { } /** - * Set the maximum number of values to return for each call to next(). - * Callers should be aware that invoking this method with any value - * is equivalent to calling {@link #setAllowPartialResults(boolean)} - * with a value of {@code true}; partial results may be returned if - * this method is called. Use {@link #setMaxResultSize(long)}} to - * limit the size of a Scan's Results instead. - * + * Set the maximum number of cells to return for each call to next(). Callers should be aware + * that this is not equivalent to calling {@link #setAllowPartialResults(boolean)}. + * If you don't allow partial results, the number of cells in each Result must equal to your + * batch setting unless it is the last Result for current row. So this method is helpful in paging + * queries. If you just want to prevent OOM at client, use setAllowPartialResults(true) is better. * @param batch the maximum number of values + * @see Result#mayHaveMoreCellsInRow() */ public Scan setBatch(int batch) { if (this.hasFilter() && this.filter.hasFilterRow()) { @@ -855,11 +854,14 @@ public class Scan extends Query { } /** - * Setting whether the caller wants to see the partial results that may be returned from the - * server. By default this value is false and the complete results will be assembled client side + * Setting whether the caller wants to see the partial results when server returns + * less-than-expected cells. It is helpful while scanning a huge row to prevent OOM at client. + * By default this value is false and the complete results will be assembled client side * before being delivered to the caller. * @param allowPartialResults * @return this + * @see Result#mayHaveMoreCellsInRow() + * @see #setBatch(int) */ public Scan setAllowPartialResults(final boolean allowPartialResults) { this.allowPartialResults = allowPartialResults; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java index aeb30c29fd6..57248b6d0d5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java @@ -1080,4 +1080,19 @@ public class TestPartialResultsFromClientSide { scanner.close(); } + @Test + public void testMayHaveMoreCellsInRowReturnsTrueAndSetBatch() throws IOException { + Table table = createTestTable(TableName.valueOf(name.getMethodName()), ROWS, FAMILIES, + QUALIFIERS, VALUE); + Scan scan = new Scan(); + scan.setBatch(1); + scan.setFilter(new FirstKeyOnlyFilter()); + ResultScanner scanner = table.getScanner(scan); + Result result; + while ((result = scanner.next()) != null) { + assertTrue(result.rawCells() != null); + assertEquals(1, result.rawCells().length); + } + } + } \ No newline at end of file