HBASE-2450 For single row reads of specific columns, seek to the first column in HFiles rather than start of (Pranav via Ryan, some Ryan)
HBASE-2916 Reseek directly to next column HBASE-2959 Scanning always starts at the beginning of a row git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1000276 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2806d60cab
commit
0f3c62e19b
|
@ -981,6 +981,9 @@ Release 0.21.0 - Unreleased
|
|||
HBASE-410 [testing] Speed up the test suite
|
||||
HBASE-2041 Change WAL default configuration values
|
||||
HBASE-2997 Performance fixes - profiler driven
|
||||
HBASE-2450 For single row reads of specific columns, seek to the
|
||||
first column in HFiles rather than start of row
|
||||
(Pranav via Ryan, some Ryan)
|
||||
|
||||
|
||||
Release 0.20.0 - Tue Sep 8 12:53:05 PDT 2009
|
||||
|
|
|
@ -259,6 +259,11 @@ public final class HConstants {
|
|||
*/
|
||||
public static final long LATEST_TIMESTAMP = Long.MAX_VALUE;
|
||||
|
||||
/**
|
||||
* Timestamp to use when we want to refer to the oldest cell.
|
||||
*/
|
||||
public static final long OLDEST_TIMESTAMP = Long.MIN_VALUE;
|
||||
|
||||
/**
|
||||
* LATEST_TIMESTAMP in bytes form
|
||||
*/
|
||||
|
|
|
@ -1649,6 +1649,31 @@ public class KeyValue implements Writable, HeapSize {
|
|||
HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a KeyValue for the specified row, family and qualifier that would be
|
||||
* larger than or equal to all other possible KeyValues that have the same
|
||||
* row, family, qualifier.
|
||||
* Used for reseeking.
|
||||
* @param row row key
|
||||
* @param roffset row offset
|
||||
* @param rlength row length
|
||||
* @param family family name
|
||||
* @param foffset family offset
|
||||
* @param flength family length
|
||||
* @param qualifier column qualifier
|
||||
* @param qoffset qualifier offset
|
||||
* @param qlength qualifier length
|
||||
* @return Last possible key on passed row, family, qualifier.
|
||||
*/
|
||||
public static KeyValue createLastOnRow(final byte [] row,
|
||||
final int roffset, final int rlength, final byte [] family,
|
||||
final int foffset, final int flength, final byte [] qualifier,
|
||||
final int qoffset, final int qlength) {
|
||||
return new KeyValue(row, roffset, rlength, family,
|
||||
foffset, flength, qualifier, qoffset, qlength,
|
||||
HConstants.OLDEST_TIMESTAMP, Type.Minimum, null, 0, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param b
|
||||
* @return A KeyValue made of a byte array that holds the key-only part.
|
||||
|
|
|
@ -439,7 +439,7 @@ public class MemStore implements HeapSize {
|
|||
// to be extra safe we only remove Puts that have a memstoreTS==0
|
||||
if (kv.getType() == KeyValue.Type.Put.getCode()) {
|
||||
// false means there was a change, so give us the size.
|
||||
addedSize -= heapSizeChange(kv, false);
|
||||
addedSize -= heapSizeChange(kv, true);
|
||||
|
||||
it.remove();
|
||||
}
|
||||
|
|
|
@ -159,16 +159,17 @@ public class ScanQueryMatcher {
|
|||
this.deletes.add(bytes, offset, qualLength, timestamp, type);
|
||||
// Can't early out now, because DelFam come before any other keys
|
||||
}
|
||||
// May be able to optimize the SKIP here, if we matched
|
||||
// due to a DelFam, we can skip to next row
|
||||
// due to a DelCol, we can skip to next col
|
||||
// But it requires more info out of isDelete().
|
||||
// needful -> million column challenge.
|
||||
return MatchCode.SKIP;
|
||||
}
|
||||
|
||||
if (!this.deletes.isEmpty() &&
|
||||
deletes.isDeleted(bytes, offset, qualLength, timestamp)) {
|
||||
|
||||
// May be able to optimize the SKIP here, if we matched
|
||||
// due to a DelFam, we can skip to next row
|
||||
// due to a DelCol, we can skip to next col
|
||||
// But it requires more info out of isDelete().
|
||||
// needful -> million column challenge.
|
||||
return MatchCode.SKIP;
|
||||
}
|
||||
|
||||
|
@ -233,6 +234,8 @@ public class ScanQueryMatcher {
|
|||
if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) &&
|
||||
rowComparator.compareRows(kv.getBuffer(),kv.getRowOffset(),
|
||||
kv.getRowLength(), stopRow, 0, stopRow.length) >= 0) {
|
||||
// KV >= STOPROW
|
||||
// then NO there is nothing left.
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
|
@ -280,6 +283,28 @@ public class ScanQueryMatcher {
|
|||
}
|
||||
}
|
||||
|
||||
public KeyValue getKeyForNextColumn(KeyValue kv) {
|
||||
ColumnCount nextColumn = columns.getColumnHint();
|
||||
if (nextColumn == null) {
|
||||
return KeyValue.createLastOnRow(
|
||||
kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
|
||||
kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
|
||||
kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength());
|
||||
} else {
|
||||
return KeyValue.createFirstOnRow(
|
||||
kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
|
||||
kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
|
||||
nextColumn.getBuffer(), nextColumn.getOffset(), nextColumn.getLength());
|
||||
}
|
||||
}
|
||||
|
||||
public KeyValue getKeyForNextRow(KeyValue kv) {
|
||||
return KeyValue.createLastOnRow(
|
||||
kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
|
||||
null, 0, 0,
|
||||
null, 0, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@link #match} return codes. These instruct the scanner moving through
|
||||
* memstores and StoreFiles what to do with the current KeyValue.
|
||||
|
|
|
@ -68,7 +68,8 @@ class StoreScanner implements KeyValueScanner, InternalScanner, ChangedReadersOb
|
|||
// pass columns = try to filter out unnecessary ScanFiles
|
||||
List<KeyValueScanner> scanners = getScanners(scan, columns);
|
||||
|
||||
// Seek all scanners to the initial key
|
||||
// Seek all scanners to the start of the Row (or if the exact maching row key does not
|
||||
// exist, then to the start of the next matching Row).
|
||||
for(KeyValueScanner scanner : scanners) {
|
||||
scanner.seek(matcher.getStartKey());
|
||||
}
|
||||
|
@ -261,18 +262,18 @@ class StoreScanner implements KeyValueScanner, InternalScanner, ChangedReadersOb
|
|||
return false;
|
||||
|
||||
case SEEK_NEXT_ROW:
|
||||
// This is just a relatively simple end of scan fix, to short-cut end us if there is a
|
||||
// endKey in the scan.
|
||||
if (!matcher.moreRowsMayExistAfter(kv)) {
|
||||
outResult.addAll(results);
|
||||
return false;
|
||||
}
|
||||
heap.next();
|
||||
|
||||
reseek(matcher.getKeyForNextRow(kv));
|
||||
break;
|
||||
|
||||
case SEEK_NEXT_COL:
|
||||
// TODO hfile needs 'hinted' seeking to prevent it from
|
||||
// reseeking from the start of the block on every dang seek.
|
||||
// We need that API and expose it the scanner chain.
|
||||
heap.next();
|
||||
reseek(matcher.getKeyForNextColumn(kv));
|
||||
break;
|
||||
|
||||
case SKIP:
|
||||
|
|
Loading…
Reference in New Issue