HBASE-6200 KeyComparator.compareWithoutRow can be wrong when families have the same prefix (Jieshan)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1354290 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f4165e27ce
commit
275bfdcc40
|
@ -2413,33 +2413,29 @@ public class KeyValue implements Writable, HeapSize {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compare column, timestamp, and key type (everything except the row).
|
* Compare columnFamily, qualifier, timestamp, and key type (everything
|
||||||
* This method is used both in the normal comparator and the "same-prefix"
|
* except the row). This method is used both in the normal comparator and
|
||||||
* comparator. Note that we are assuming that row portions of both KVs have
|
* the "same-prefix" comparator. Note that we are assuming that row portions
|
||||||
* already been parsed and found identical, and we don't validate that
|
* of both KVs have already been parsed and found identical, and we don't
|
||||||
* assumption here.
|
* validate that assumption here.
|
||||||
* @param commonPrefix the length of the common prefix of the two
|
* @param commonPrefix
|
||||||
* key-values being compared, including row length and row
|
* the length of the common prefix of the two key-values being
|
||||||
|
* compared, including row length and row
|
||||||
*/
|
*/
|
||||||
private int compareWithoutRow(int commonPrefix, byte[] left, int loffset,
|
private int compareWithoutRow(int commonPrefix, byte[] left, int loffset,
|
||||||
int llength, byte[] right, int roffset, int rlength, short rowlength) {
|
int llength, byte[] right, int roffset, int rlength, short rowlength) {
|
||||||
// Compare column family. Start comparing past row and family length.
|
/***
|
||||||
int lcolumnoffset = ROW_LENGTH_SIZE + FAMILY_LENGTH_SIZE +
|
* KeyValue Format and commonLength:
|
||||||
rowlength + loffset;
|
* |_keyLen_|_valLen_|_rowLen_|_rowKey_|_famiLen_|_fami_|_Quali_|....
|
||||||
int rcolumnoffset = ROW_LENGTH_SIZE + FAMILY_LENGTH_SIZE +
|
* ------------------|-------commonLength--------|--------------
|
||||||
rowlength + roffset;
|
*/
|
||||||
int lcolumnlength = llength - TIMESTAMP_TYPE_SIZE -
|
int commonLength = ROW_LENGTH_SIZE + FAMILY_LENGTH_SIZE + rowlength;
|
||||||
(lcolumnoffset - loffset);
|
|
||||||
int rcolumnlength = rlength - TIMESTAMP_TYPE_SIZE -
|
|
||||||
(rcolumnoffset - roffset);
|
|
||||||
|
|
||||||
// If row matches, and no column in the 'left' AND put type is 'minimum',
|
// commonLength + TIMESTAMP_TYPE_SIZE
|
||||||
// then return that left is larger than right.
|
int commonLengthWithTSAndType = TIMESTAMP_TYPE_SIZE + commonLength;
|
||||||
|
// ColumnFamily + Qualifier length.
|
||||||
// This supports 'last key on a row' - the magic is if there is no column
|
int lcolumnlength = llength - commonLengthWithTSAndType;
|
||||||
// in the left operand, and the left operand has a type of '0' - magical
|
int rcolumnlength = rlength - commonLengthWithTSAndType;
|
||||||
// value, then we say the left is bigger. This will let us seek to the
|
|
||||||
// last key in a row.
|
|
||||||
|
|
||||||
byte ltype = left[loffset + (llength - 1)];
|
byte ltype = left[loffset + (llength - 1)];
|
||||||
byte rtype = right[roffset + (rlength - 1)];
|
byte rtype = right[roffset + (rlength - 1)];
|
||||||
|
@ -2447,7 +2443,7 @@ public class KeyValue implements Writable, HeapSize {
|
||||||
// If the column is not specified, the "minimum" key type appears the
|
// If the column is not specified, the "minimum" key type appears the
|
||||||
// latest in the sorted order, regardless of the timestamp. This is used
|
// latest in the sorted order, regardless of the timestamp. This is used
|
||||||
// for specifying the last key/value in a given row, because there is no
|
// for specifying the last key/value in a given row, because there is no
|
||||||
// "lexicographically last column" (it would be infinitely long). The
|
// "lexicographically last column" (it would be infinitely long). The
|
||||||
// "maximum" key type does not need this behavior.
|
// "maximum" key type does not need this behavior.
|
||||||
if (lcolumnlength == 0 && ltype == Type.Minimum.getCode()) {
|
if (lcolumnlength == 0 && ltype == Type.Minimum.getCode()) {
|
||||||
// left is "bigger", i.e. it appears later in the sorted order
|
// left is "bigger", i.e. it appears later in the sorted order
|
||||||
|
@ -2457,20 +2453,38 @@ public class KeyValue implements Writable, HeapSize {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int lfamilyoffset = commonLength + loffset;
|
||||||
|
int rfamilyoffset = commonLength + roffset;
|
||||||
|
|
||||||
|
// Column family length.
|
||||||
|
int lfamilylength = left[lfamilyoffset - 1];
|
||||||
|
int rfamilylength = right[rfamilyoffset - 1];
|
||||||
|
// If left family size is not equal to right family size, we need not
|
||||||
|
// compare the qualifiers.
|
||||||
|
boolean sameFamilySize = (lfamilylength == rfamilylength);
|
||||||
int common = 0;
|
int common = 0;
|
||||||
if (commonPrefix > 0) {
|
if (commonPrefix > 0) {
|
||||||
common = Math.max(0, commonPrefix -
|
common = Math.max(0, commonPrefix - commonLength);
|
||||||
rowlength - ROW_LENGTH_SIZE - FAMILY_LENGTH_SIZE);
|
if (!sameFamilySize) {
|
||||||
common = Math.min(common, Math.min(lcolumnlength, rcolumnlength));
|
// Common should not be larger than Math.min(lfamilylength,
|
||||||
|
// rfamilylength).
|
||||||
|
common = Math.min(common, Math.min(lfamilylength, rfamilylength));
|
||||||
|
} else {
|
||||||
|
common = Math.min(common, Math.min(lcolumnlength, rcolumnlength));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if (!sameFamilySize) {
|
||||||
final int comparisonResult = Bytes.compareTo(
|
// comparing column family is enough.
|
||||||
left, lcolumnoffset + common, lcolumnlength - common,
|
return Bytes.compareTo(left, lfamilyoffset + common, lfamilylength
|
||||||
right, rcolumnoffset + common, rcolumnlength - common);
|
- common, right, rfamilyoffset + common, rfamilylength - common);
|
||||||
if (comparisonResult != 0) {
|
}
|
||||||
return comparisonResult;
|
// Compare family & qualifier together.
|
||||||
|
final int comparison = Bytes.compareTo(left, lfamilyoffset + common,
|
||||||
|
lcolumnlength - common, right, rfamilyoffset + common,
|
||||||
|
rcolumnlength - common);
|
||||||
|
if (comparison != 0) {
|
||||||
|
return comparison;
|
||||||
}
|
}
|
||||||
|
|
||||||
return compareTimestampAndType(left, loffset, llength, right, roffset,
|
return compareTimestampAndType(left, loffset, llength, right, roffset,
|
||||||
rlength, ltype, rtype);
|
rlength, ltype, rtype);
|
||||||
}
|
}
|
||||||
|
|
|
@ -341,6 +341,65 @@ public class TestKeyValue extends TestCase {
|
||||||
assertTrue(cmp > 0);
|
assertTrue(cmp > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void assertKVLessWithoutRow(KeyValue.KeyComparator c, int common, KeyValue less,
|
||||||
|
KeyValue greater) {
|
||||||
|
int cmp = c.compareIgnoringPrefix(common, less.getBuffer(), less.getOffset()
|
||||||
|
+ KeyValue.ROW_OFFSET, less.getKeyLength(), greater.getBuffer(),
|
||||||
|
greater.getOffset() + KeyValue.ROW_OFFSET, greater.getKeyLength());
|
||||||
|
assertTrue(cmp < 0);
|
||||||
|
cmp = c.compareIgnoringPrefix(common, greater.getBuffer(), greater.getOffset()
|
||||||
|
+ KeyValue.ROW_OFFSET, greater.getKeyLength(), less.getBuffer(),
|
||||||
|
less.getOffset() + KeyValue.ROW_OFFSET, less.getKeyLength());
|
||||||
|
assertTrue(cmp > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCompareWithoutRow() {
|
||||||
|
final KeyValue.KeyComparator c = KeyValue.KEY_COMPARATOR;
|
||||||
|
byte[] row = Bytes.toBytes("row");
|
||||||
|
|
||||||
|
byte[] fa = Bytes.toBytes("fa");
|
||||||
|
byte[] fami = Bytes.toBytes("fami");
|
||||||
|
byte[] fami1 = Bytes.toBytes("fami1");
|
||||||
|
|
||||||
|
byte[] qual0 = Bytes.toBytes("");
|
||||||
|
byte[] qual1 = Bytes.toBytes("qf1");
|
||||||
|
byte[] qual2 = Bytes.toBytes("qf2");
|
||||||
|
long ts = 1;
|
||||||
|
|
||||||
|
// 'fa:'
|
||||||
|
KeyValue kv_0 = new KeyValue(row, fa, qual0, ts, Type.Put);
|
||||||
|
// 'fami:'
|
||||||
|
KeyValue kv0_0 = new KeyValue(row, fami, qual0, ts, Type.Put);
|
||||||
|
// 'fami:qf1'
|
||||||
|
KeyValue kv0_1 = new KeyValue(row, fami, qual1, ts, Type.Put);
|
||||||
|
// 'fami:qf2'
|
||||||
|
KeyValue kv0_2 = new KeyValue(row, fami, qual2, ts, Type.Put);
|
||||||
|
// 'fami1:'
|
||||||
|
KeyValue kv1_0 = new KeyValue(row, fami1, qual0, ts, Type.Put);
|
||||||
|
|
||||||
|
// 'fami:qf1' < 'fami:qf2'
|
||||||
|
assertKVLessWithoutRow(c, 0, kv0_1, kv0_2);
|
||||||
|
// 'fami:qf1' < 'fami1:'
|
||||||
|
assertKVLessWithoutRow(c, 0, kv0_1, kv1_0);
|
||||||
|
|
||||||
|
// Test comparison by skipping the same prefix bytes.
|
||||||
|
/***
|
||||||
|
* KeyValue Format and commonLength:
|
||||||
|
* |_keyLen_|_valLen_|_rowLen_|_rowKey_|_famiLen_|_fami_|_Quali_|....
|
||||||
|
* ------------------|-------commonLength--------|--------------
|
||||||
|
*/
|
||||||
|
int commonLength = KeyValue.ROW_LENGTH_SIZE + KeyValue.FAMILY_LENGTH_SIZE
|
||||||
|
+ row.length;
|
||||||
|
// 'fa:' < 'fami:'. They have commonPrefix + 2 same prefix bytes.
|
||||||
|
assertKVLessWithoutRow(c, commonLength + 2, kv_0, kv0_0);
|
||||||
|
// 'fami:' < 'fami:qf1'. They have commonPrefix + 4 same prefix bytes.
|
||||||
|
assertKVLessWithoutRow(c, commonLength + 4, kv0_0, kv0_1);
|
||||||
|
// 'fami:qf1' < 'fami1:'. They have commonPrefix + 4 same prefix bytes.
|
||||||
|
assertKVLessWithoutRow(c, commonLength + 4, kv0_1, kv1_0);
|
||||||
|
// 'fami:qf1' < 'fami:qf2'. They have commonPrefix + 6 same prefix bytes.
|
||||||
|
assertKVLessWithoutRow(c, commonLength + 6, kv0_1, kv0_2);
|
||||||
|
}
|
||||||
|
|
||||||
public void testFirstLastOnRow() {
|
public void testFirstLastOnRow() {
|
||||||
final KVComparator c = KeyValue.COMPARATOR;
|
final KVComparator c = KeyValue.COMPARATOR;
|
||||||
long ts = 1;
|
long ts = 1;
|
||||||
|
|
Loading…
Reference in New Issue