HBASE-8930 REAPPLY with testfix

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1521354 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
larsh 2013-09-10 04:56:32 +00:00
parent 916366615b
commit da4167c42b
7 changed files with 332 additions and 88 deletions

View File

@ -48,25 +48,44 @@ import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
*/ */
@InterfaceAudience.Private @InterfaceAudience.Private
public interface ColumnTracker { public interface ColumnTracker {
/** /**
* Keeps track of the number of versions for the columns asked for * Checks if the column is present in the list of requested columns by returning the match code
* instance. It does not check against the number of versions for the columns asked for. To do the
* version check, one has to call {@link #checkVersions(byte[], int, int, long, byte, boolean)}
* method based on the return type (INCLUDE) of this method. The values that can be returned by
* this method are {@link MatchCode#INCLUDE}, {@link MatchCode#SEEK_NEXT_COL} and
* {@link MatchCode#SEEK_NEXT_ROW}.
* @param bytes * @param bytes
* @param offset * @param offset
* @param length * @param length
* @param ttl The timeToLive to enforce.
* @param type The type of the KeyValue * @param type The type of the KeyValue
* @param ignoreCount indicates if the KV needs to be excluded while counting
* (used during compactions. We only count KV's that are older than all the
* scanners' read points.)
* @return The match code instance. * @return The match code instance.
* @throws IOException in case there is an internal consistency problem * @throws IOException in case there is an internal consistency problem caused by a data
* caused by a data corruption. * corruption.
*/ */
ScanQueryMatcher.MatchCode checkColumn( ScanQueryMatcher.MatchCode checkColumn(byte[] bytes, int offset, int length, byte type)
byte[] bytes, int offset, int length, long ttl, byte type, boolean ignoreCount
)
throws IOException; throws IOException;
/**
* Keeps track of the number of versions for the columns asked for. It assumes that the user has
* already checked if the keyvalue needs to be included by calling the
* {@link #checkColumn(byte[], int, int, byte)} method. The enum values returned by this method
* are {@link MatchCode#SKIP}, {@link MatchCode#INCLUDE},
* {@link MatchCode#INCLUDE_AND_SEEK_NEXT_COL} and {@link MatchCode#INCLUDE_AND_SEEK_NEXT_ROW}.
* Implementations which include all the columns could just return {@link MatchCode#INCLUDE} in
* the {@link #checkColumn(byte[], int, int, byte)} method and perform all the operations in this
* checkVersions method.
* @param type the type of the key value (Put/Delete)
* @param ttl The timeToLive to enforce.
* @param ignoreCount indicates if the KV needs to be excluded while counting (used during
* compactions. We only count KV's that are older than all the scanners' read points.)
* @return the scan query matcher match code instance
* @throws IOException in case there is an internal consistency problem caused by a data
* corruption.
*/
ScanQueryMatcher.MatchCode checkVersions(byte[] bytes, int offset, int length, long ttl,
byte type, boolean ignoreCount) throws IOException;
/** /**
* Resets the Matcher * Resets the Matcher
*/ */

View File

@ -18,6 +18,7 @@
*/ */
package org.apache.hadoop.hbase.regionserver; package org.apache.hadoop.hbase.regionserver;
import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.NavigableSet; import java.util.NavigableSet;
@ -106,7 +107,7 @@ public class ExplicitColumnTracker implements ColumnTracker {
*/ */
@Override @Override
public ScanQueryMatcher.MatchCode checkColumn(byte [] bytes, int offset, public ScanQueryMatcher.MatchCode checkColumn(byte [] bytes, int offset,
int length, long timestamp, byte type, boolean ignoreCount) { int length, byte type) {
// delete markers should never be passed to an // delete markers should never be passed to an
// *Explicit*ColumnTracker // *Explicit*ColumnTracker
assert !KeyValue.isDelete(type); assert !KeyValue.isDelete(type);
@ -125,34 +126,9 @@ public class ExplicitColumnTracker implements ColumnTracker {
int ret = Bytes.compareTo(column.getBuffer(), column.getOffset(), int ret = Bytes.compareTo(column.getBuffer(), column.getOffset(),
column.getLength(), bytes, offset, length); column.getLength(), bytes, offset, length);
// Column Matches. If it is not a duplicate key, increment the version count // Column Matches. Return include code. The caller would call checkVersions
// and include. // to limit the number of versions.
if(ret == 0) { if(ret == 0) {
if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
//If column matches, check if it is a duplicate timestamp
if (sameAsPreviousTS(timestamp)) {
//If duplicate, skip this Key
return ScanQueryMatcher.MatchCode.SKIP;
}
int count = this.column.increment();
if(count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
// Done with versions for this column
++this.index;
resetTS();
if (done()) {
// We have served all the requested columns.
this.column = null;
return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
} else {
// We are done with current column; advance to next column
// of interest.
this.column = this.columns.get(this.index);
return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
}
} else {
setTS(timestamp);
}
return ScanQueryMatcher.MatchCode.INCLUDE; return ScanQueryMatcher.MatchCode.INCLUDE;
} }
@ -180,6 +156,35 @@ public class ExplicitColumnTracker implements ColumnTracker {
} while(true); } while(true);
} }
@Override
public ScanQueryMatcher.MatchCode checkVersions(byte[] bytes, int offset, int length,
long timestamp, byte type, boolean ignoreCount) throws IOException {
assert !KeyValue.isDelete(type);
if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
// Check if it is a duplicate timestamp
if (sameAsPreviousTS(timestamp)) {
// If duplicate, skip this Key
return ScanQueryMatcher.MatchCode.SKIP;
}
int count = this.column.increment();
if (count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
// Done with versions for this column
++this.index;
resetTS();
if (done()) {
// We have served all the requested columns.
this.column = null;
return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
}
// We are done with current column; advance to next column
// of interest.
this.column = this.columns.get(this.index);
return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
}
setTS(timestamp);
return ScanQueryMatcher.MatchCode.INCLUDE;
}
// Called between every row. // Called between every row.
public void reset() { public void reset() {
this.index = 0; this.index = 0;

View File

@ -369,42 +369,60 @@ public class ScanQueryMatcher {
return columns.getNextRowOrNextColumn(bytes, offset, qualLength); return columns.getNextRowOrNextColumn(bytes, offset, qualLength);
} }
/** // STEP 1: Check if the column is part of the requested columns
* Filters should be checked before checking column trackers. If we do MatchCode colChecker = columns.checkColumn(bytes, offset, qualLength, type);
* otherwise, as was previously being done, ColumnTracker may increment its if (colChecker == MatchCode.INCLUDE) {
* counter for even that KV which may be discarded later on by Filter. This ReturnCode filterResponse = ReturnCode.SKIP;
* would lead to incorrect results in certain cases. // STEP 2: Yes, the column is part of the requested columns. Check if filter is present
*/ if (filter != null) {
ReturnCode filterResponse = ReturnCode.SKIP; // STEP 3: Filter the key value and return if it filters out
if (filter != null) { filterResponse = filter.filterKeyValue(kv);
filterResponse = filter.filterKeyValue(kv); switch (filterResponse) {
if (filterResponse == ReturnCode.SKIP) { case SKIP:
return MatchCode.SKIP; return MatchCode.SKIP;
} else if (filterResponse == ReturnCode.NEXT_COL) { case NEXT_COL:
return columns.getNextRowOrNextColumn(bytes, offset, qualLength); return columns.getNextRowOrNextColumn(bytes, offset, qualLength);
} else if (filterResponse == ReturnCode.NEXT_ROW) { case NEXT_ROW:
stickyNextRow = true; stickyNextRow = true;
return MatchCode.SEEK_NEXT_ROW; return MatchCode.SEEK_NEXT_ROW;
} else if (filterResponse == ReturnCode.SEEK_NEXT_USING_HINT) { case SEEK_NEXT_USING_HINT:
return MatchCode.SEEK_NEXT_USING_HINT; return MatchCode.SEEK_NEXT_USING_HINT;
default:
//It means it is either include or include and seek next
break;
}
} }
/*
* STEP 4: Reaching this step means the column is part of the requested columns and either
* the filter is null or the filter has returned INCLUDE or INCLUDE_AND_NEXT_COL response.
* Now check the number of versions needed. This method call returns SKIP, INCLUDE,
* INCLUDE_AND_SEEK_NEXT_ROW, INCLUDE_AND_SEEK_NEXT_COL.
*
* FilterResponse ColumnChecker Desired behavior
* INCLUDE SKIP row has already been included, SKIP.
* INCLUDE INCLUDE INCLUDE
* INCLUDE INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_COL
* INCLUDE INCLUDE_AND_SEEK_NEXT_ROW INCLUDE_AND_SEEK_NEXT_ROW
* INCLUDE_AND_SEEK_NEXT_COL SKIP row has already been included, SKIP.
* INCLUDE_AND_SEEK_NEXT_COL INCLUDE INCLUDE_AND_SEEK_NEXT_COL
* INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_COL
* INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW INCLUDE_AND_SEEK_NEXT_ROW
*
* In all the above scenarios, we return the column checker return value except for
* FilterResponse (INCLUDE_AND_SEEK_NEXT_COL) and ColumnChecker(INCLUDE)
*/
colChecker =
columns.checkVersions(bytes, offset, qualLength, timestamp, type,
kv.getMvccVersion() > maxReadPointToTrackVersions);
//Optimize with stickyNextRow
stickyNextRow = colChecker == MatchCode.INCLUDE_AND_SEEK_NEXT_ROW ? true : stickyNextRow;
return (filterResponse == ReturnCode.INCLUDE_AND_NEXT_COL &&
colChecker == MatchCode.INCLUDE) ? MatchCode.INCLUDE_AND_SEEK_NEXT_COL
: colChecker;
} }
stickyNextRow = (colChecker == MatchCode.SEEK_NEXT_ROW) ? true
MatchCode colChecker = columns.checkColumn(bytes, offset, qualLength, : stickyNextRow;
timestamp, type, kv.getMvccVersion() > maxReadPointToTrackVersions);
/*
* According to current implementation, colChecker can only be
* SEEK_NEXT_COL, SEEK_NEXT_ROW, SKIP or INCLUDE. Therefore, always return
* the MatchCode. If it is SEEK_NEXT_ROW, also set stickyNextRow.
*/
if (colChecker == MatchCode.SEEK_NEXT_ROW) {
stickyNextRow = true;
} else if (filter != null && colChecker == MatchCode.INCLUDE &&
filterResponse == ReturnCode.INCLUDE_AND_NEXT_COL) {
return MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
}
return colChecker; return colChecker;
} }
/** Handle partial-drop-deletes. As we match keys in order, when we have a range from which /** Handle partial-drop-deletes. As we match keys in order, when we have a range from which
@ -511,6 +529,16 @@ public class ScanQueryMatcher {
null, 0, 0); null, 0, 0);
} }
//Used only for testing purposes
static MatchCode checkColumn(ColumnTracker columnTracker, byte[] bytes, int offset,
int length, long ttl, byte type, boolean ignoreCount) throws IOException {
MatchCode matchCode = columnTracker.checkColumn(bytes, offset, length, type);
if (matchCode == MatchCode.INCLUDE) {
return columnTracker.checkVersions(bytes, offset, length, ttl, type, ignoreCount);
}
return matchCode;
}
/** /**
* {@link #match} return codes. These instruct the scanner moving through * {@link #match} return codes. These instruct the scanner moving through
* memstores and StoreFiles what to do with the current KeyValue. * memstores and StoreFiles what to do with the current KeyValue.

View File

@ -62,13 +62,22 @@ public class ScanWildcardColumnTracker implements ColumnTracker {
/** /**
* {@inheritDoc} * {@inheritDoc}
* This receives puts *and* deletes. * This receives puts *and* deletes.
* Deletes do not count as a version, but rather take the version
* of the previous put (so eventually all but the last can be reclaimed).
*/ */
@Override @Override
public MatchCode checkColumn(byte[] bytes, int offset, int length, public MatchCode checkColumn(byte[] bytes, int offset, int length, byte type)
throws IOException {
return MatchCode.INCLUDE;
}
/**
* {@inheritDoc}
* This receives puts *and* deletes. Deletes do not count as a version, but rather
* take the version of the previous put (so eventually all but the last can be reclaimed).
*/
@Override
public ScanQueryMatcher.MatchCode checkVersions(byte[] bytes, int offset, int length,
long timestamp, byte type, boolean ignoreCount) throws IOException { long timestamp, byte type, boolean ignoreCount) throws IOException {
if (columnBuffer == null) { if (columnBuffer == null) {
// first iteration. // first iteration.
resetBuffer(bytes, offset, length); resetBuffer(bytes, offset, length);
@ -176,7 +185,6 @@ public class ScanWildcardColumnTracker implements ColumnTracker {
return null; return null;
} }
/** /**
* We can never know a-priori if we are done, so always return false. * We can never know a-priori if we are done, so always return false.
* @return false * @return false

View File

@ -0,0 +1,182 @@
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.SmallTests;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.InternalScanner;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
/**
* Test the invocation logic of the filters. A filter must be invoked only for
* the columns that are requested for.
*/
@Category(SmallTests.class)
public class TestInvocationRecordFilter {
private static final byte[] TABLE_NAME_BYTES = Bytes
.toBytes("invocationrecord");
private static final byte[] FAMILY_NAME_BYTES = Bytes.toBytes("mycf");
private static final byte[] ROW_BYTES = Bytes.toBytes("row");
private static final String QUALIFIER_PREFIX = "qualifier";
private static final String VALUE_PREFIX = "value";
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private HRegion region;
@Before
public void setUp() throws Exception {
HTableDescriptor htd = new HTableDescriptor(
TableName.valueOf(TABLE_NAME_BYTES));
htd.addFamily(new HColumnDescriptor(FAMILY_NAME_BYTES));
HRegionInfo info = new HRegionInfo(htd.getTableName(), null, null, false);
this.region = HRegion.createHRegion(info, TEST_UTIL.getDataTestDir(),
TEST_UTIL.getConfiguration(), htd);
Put put = new Put(ROW_BYTES);
for (int i = 0; i < 10; i += 2) {
// puts 0, 2, 4, 6 and 8
put.add(FAMILY_NAME_BYTES, Bytes.toBytes(QUALIFIER_PREFIX + i), i,
Bytes.toBytes(VALUE_PREFIX + i));
}
this.region.put(put);
this.region.flushcache();
}
@Test
public void testFilterInvocation() throws Exception {
List<Integer> selectQualifiers = new ArrayList<Integer>();
List<Integer> expectedQualifiers = new ArrayList<Integer>();
selectQualifiers.add(-1);
verifyInvocationResults(selectQualifiers.toArray(new Integer[0]),
expectedQualifiers.toArray(new Integer[0]));
selectQualifiers.clear();
selectQualifiers.add(0);
expectedQualifiers.add(0);
verifyInvocationResults(selectQualifiers.toArray(new Integer[0]),
expectedQualifiers.toArray(new Integer[0]));
selectQualifiers.add(3);
verifyInvocationResults(selectQualifiers.toArray(new Integer[0]),
expectedQualifiers.toArray(new Integer[0]));
selectQualifiers.add(4);
expectedQualifiers.add(4);
verifyInvocationResults(selectQualifiers.toArray(new Integer[0]),
expectedQualifiers.toArray(new Integer[0]));
selectQualifiers.add(5);
verifyInvocationResults(selectQualifiers.toArray(new Integer[0]),
expectedQualifiers.toArray(new Integer[0]));
selectQualifiers.add(8);
expectedQualifiers.add(8);
verifyInvocationResults(selectQualifiers.toArray(new Integer[0]),
expectedQualifiers.toArray(new Integer[0]));
}
public void verifyInvocationResults(Integer[] selectQualifiers,
Integer[] expectedQualifiers) throws Exception {
Get get = new Get(ROW_BYTES);
for (int i = 0; i < selectQualifiers.length; i++) {
get.addColumn(FAMILY_NAME_BYTES,
Bytes.toBytes(QUALIFIER_PREFIX + selectQualifiers[i]));
}
get.setFilter(new InvocationRecordFilter());
List<KeyValue> expectedValues = new ArrayList<KeyValue>();
for (int i = 0; i < expectedQualifiers.length; i++) {
expectedValues.add(new KeyValue(ROW_BYTES, FAMILY_NAME_BYTES, Bytes
.toBytes(QUALIFIER_PREFIX + expectedQualifiers[i]),
expectedQualifiers[i], Bytes.toBytes(VALUE_PREFIX
+ expectedQualifiers[i])));
}
Scan scan = new Scan(get);
List<Cell> actualValues = new ArrayList<Cell>();
List<Cell> temp = new ArrayList<Cell>();
InternalScanner scanner = this.region.getScanner(scan);
while (scanner.next(temp)) {
actualValues.addAll(temp);
temp.clear();
}
actualValues.addAll(temp);
Assert.assertTrue("Actual values " + actualValues
+ " differ from the expected values:" + expectedValues,
expectedValues.equals(actualValues));
}
@After
public void tearDown() throws Exception {
HLog hlog = region.getLog();
region.close();
hlog.closeAndDelete();
}
/**
* Filter which gives the list of keyvalues for which the filter is invoked.
*/
private static class InvocationRecordFilter extends FilterBase {
private List<Cell> visitedKeyValues = new ArrayList<Cell>();
public void reset() {
visitedKeyValues.clear();
}
public ReturnCode filterKeyValue(Cell ignored) {
visitedKeyValues.add(ignored);
return ReturnCode.INCLUDE;
}
public void filterRowCells(List<Cell> kvs) {
kvs.clear();
kvs.addAll(visitedKeyValues);
}
public boolean hasFilterRow() {
return true;
}
}
}

View File

@ -55,8 +55,8 @@ public class TestExplicitColumnTracker extends HBaseTestCase {
long timestamp = 0; long timestamp = 0;
//"Match" //"Match"
for(byte [] col : scannerColumns){ for(byte [] col : scannerColumns){
result.add(exp.checkColumn(col, 0, col.length, ++timestamp, result.add(ScanQueryMatcher.checkColumn(exp, col, 0, col.length, ++timestamp,
KeyValue.Type.Put.getCode(), false)); KeyValue.Type.Put.getCode(), false));
} }
assertEquals(expected.size(), result.size()); assertEquals(expected.size(), result.size());
@ -168,15 +168,15 @@ public class TestExplicitColumnTracker extends HBaseTestCase {
Long.MIN_VALUE); Long.MIN_VALUE);
for (int i = 0; i < 100000; i+=2) { for (int i = 0; i < 100000; i+=2) {
byte [] col = Bytes.toBytes("col"+i); byte [] col = Bytes.toBytes("col"+i);
explicit.checkColumn(col, 0, col.length, 1, KeyValue.Type.Put.getCode(), ScanQueryMatcher.checkColumn(explicit, col, 0, col.length, 1, KeyValue.Type.Put.getCode(),
false); false);
} }
explicit.reset(); explicit.reset();
for (int i = 1; i < 100000; i+=2) { for (int i = 1; i < 100000; i+=2) {
byte [] col = Bytes.toBytes("col"+i); byte [] col = Bytes.toBytes("col"+i);
explicit.checkColumn(col, 0, col.length, 1, KeyValue.Type.Put.getCode(), ScanQueryMatcher.checkColumn(explicit, col, 0, col.length, 1, KeyValue.Type.Put.getCode(),
false); false);
} }
} }

View File

@ -54,8 +54,9 @@ public class TestScanWildcardColumnTracker extends HBaseTestCase {
List<ScanQueryMatcher.MatchCode> actual = new ArrayList<MatchCode>(); List<ScanQueryMatcher.MatchCode> actual = new ArrayList<MatchCode>();
for(byte [] qualifier : qualifiers) { for(byte [] qualifier : qualifiers) {
ScanQueryMatcher.MatchCode mc = tracker.checkColumn(qualifier, 0, ScanQueryMatcher.MatchCode mc =
qualifier.length, 1, KeyValue.Type.Put.getCode(), false); ScanQueryMatcher.checkColumn(tracker, qualifier, 0, qualifier.length, 1,
KeyValue.Type.Put.getCode(), false);
actual.add(mc); actual.add(mc);
} }
@ -87,8 +88,9 @@ public class TestScanWildcardColumnTracker extends HBaseTestCase {
long timestamp = 0; long timestamp = 0;
for(byte [] qualifier : qualifiers) { for(byte [] qualifier : qualifiers) {
MatchCode mc = tracker.checkColumn(qualifier, 0, qualifier.length, MatchCode mc =
++timestamp, KeyValue.Type.Put.getCode(), false); ScanQueryMatcher.checkColumn(tracker, qualifier, 0, qualifier.length, ++timestamp,
KeyValue.Type.Put.getCode(), false);
actual.add(mc); actual.add(mc);
} }
@ -111,8 +113,8 @@ public class TestScanWildcardColumnTracker extends HBaseTestCase {
try { try {
for(byte [] qualifier : qualifiers) { for(byte [] qualifier : qualifiers) {
tracker.checkColumn(qualifier, 0, qualifier.length, 1, ScanQueryMatcher.checkColumn(tracker, qualifier, 0, qualifier.length, 1,
KeyValue.Type.Put.getCode(), false); KeyValue.Type.Put.getCode(), false);
} }
} catch (Exception e) { } catch (Exception e) {
ok = true; ok = true;