diff --git a/CHANGES.txt b/CHANGES.txt index b27b8de9efb..38207fff9e5 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -981,6 +981,9 @@ Release 0.21.0 - Unreleased HBASE-410 [testing] Speed up the test suite HBASE-2041 Change WAL default configuration values HBASE-2997 Performance fixes - profiler driven + HBASE-2450 For single row reads of specific columns, seek to the + first column in HFiles rather than start of row + (Pranav via Ryan, some Ryan) Release 0.20.0 - Tue Sep 8 12:53:05 PDT 2009 diff --git a/src/main/java/org/apache/hadoop/hbase/HConstants.java b/src/main/java/org/apache/hadoop/hbase/HConstants.java index a6692903fe1..f565fa126fc 100644 --- a/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -259,6 +259,11 @@ public final class HConstants { */ public static final long LATEST_TIMESTAMP = Long.MAX_VALUE; + /** + * Timestamp to use when we want to refer to the oldest cell. + */ + public static final long OLDEST_TIMESTAMP = Long.MIN_VALUE; + /** * LATEST_TIMESTAMP in bytes form */ diff --git a/src/main/java/org/apache/hadoop/hbase/KeyValue.java b/src/main/java/org/apache/hadoop/hbase/KeyValue.java index bb26f2784d9..14c5c8c4861 100644 --- a/src/main/java/org/apache/hadoop/hbase/KeyValue.java +++ b/src/main/java/org/apache/hadoop/hbase/KeyValue.java @@ -1649,6 +1649,31 @@ public class KeyValue implements Writable, HeapSize { HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0); } + /** + * Create a KeyValue for the specified row, family and qualifier that would be + * larger than or equal to all other possible KeyValues that have the same + * row, family, qualifier. + * Used for reseeking. + * @param row row key + * @param roffset row offset + * @param rlength row length + * @param family family name + * @param foffset family offset + * @param flength family length + * @param qualifier column qualifier + * @param qoffset qualifier offset + * @param qlength qualifier length + * @return Last possible key on passed row, family, qualifier. + */ + public static KeyValue createLastOnRow(final byte [] row, + final int roffset, final int rlength, final byte [] family, + final int foffset, final int flength, final byte [] qualifier, + final int qoffset, final int qlength) { + return new KeyValue(row, roffset, rlength, family, + foffset, flength, qualifier, qoffset, qlength, + HConstants.OLDEST_TIMESTAMP, Type.Minimum, null, 0, 0); + } + /** * @param b * @return A KeyValue made of a byte array that holds the key-only part. diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java b/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java index 3a78bd472d2..5657ab1d181 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java @@ -439,7 +439,7 @@ public class MemStore implements HeapSize { // to be extra safe we only remove Puts that have a memstoreTS==0 if (kv.getType() == KeyValue.Type.Put.getCode()) { // false means there was a change, so give us the size. - addedSize -= heapSizeChange(kv, false); + addedSize -= heapSizeChange(kv, true); it.remove(); } diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java b/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java index e4f58050897..0f42e3a765d 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java @@ -159,16 +159,17 @@ public class ScanQueryMatcher { this.deletes.add(bytes, offset, qualLength, timestamp, type); // Can't early out now, because DelFam come before any other keys } - // May be able to optimize the SKIP here, if we matched - // due to a DelFam, we can skip to next row - // due to a DelCol, we can skip to next col - // But it requires more info out of isDelete(). - // needful -> million column challenge. return MatchCode.SKIP; } if (!this.deletes.isEmpty() && deletes.isDeleted(bytes, offset, qualLength, timestamp)) { + + // May be able to optimize the SKIP here, if we matched + // due to a DelFam, we can skip to next row + // due to a DelCol, we can skip to next col + // But it requires more info out of isDelete(). + // needful -> million column challenge. return MatchCode.SKIP; } @@ -233,6 +234,8 @@ public class ScanQueryMatcher { if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) && rowComparator.compareRows(kv.getBuffer(),kv.getRowOffset(), kv.getRowLength(), stopRow, 0, stopRow.length) >= 0) { + // KV >= STOPROW + // then NO there is nothing left. return false; } else { return true; @@ -280,6 +283,28 @@ public class ScanQueryMatcher { } } + public KeyValue getKeyForNextColumn(KeyValue kv) { + ColumnCount nextColumn = columns.getColumnHint(); + if (nextColumn == null) { + return KeyValue.createLastOnRow( + kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(), + kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(), + kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength()); + } else { + return KeyValue.createFirstOnRow( + kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(), + kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(), + nextColumn.getBuffer(), nextColumn.getOffset(), nextColumn.getLength()); + } + } + + public KeyValue getKeyForNextRow(KeyValue kv) { + return KeyValue.createLastOnRow( + kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(), + null, 0, 0, + null, 0, 0); + } + /** * {@link #match} return codes. These instruct the scanner moving through * memstores and StoreFiles what to do with the current KeyValue. diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index dac9635c29d..4775fc86454 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -68,7 +68,8 @@ class StoreScanner implements KeyValueScanner, InternalScanner, ChangedReadersOb // pass columns = try to filter out unnecessary ScanFiles List scanners = getScanners(scan, columns); - // Seek all scanners to the initial key + // Seek all scanners to the start of the Row (or if the exact maching row key does not + // exist, then to the start of the next matching Row). for(KeyValueScanner scanner : scanners) { scanner.seek(matcher.getStartKey()); } @@ -261,18 +262,18 @@ class StoreScanner implements KeyValueScanner, InternalScanner, ChangedReadersOb return false; case SEEK_NEXT_ROW: + // This is just a relatively simple end of scan fix, to short-cut end us if there is a + // endKey in the scan. if (!matcher.moreRowsMayExistAfter(kv)) { outResult.addAll(results); return false; } - heap.next(); + + reseek(matcher.getKeyForNextRow(kv)); break; case SEEK_NEXT_COL: - // TODO hfile needs 'hinted' seeking to prevent it from - // reseeking from the start of the block on every dang seek. - // We need that API and expose it the scanner chain. - heap.next(); + reseek(matcher.getKeyForNextColumn(kv)); break; case SKIP: