HBASE-12069 Finish making HFile.Writer Cell-centric; undo APIs that expect KV serializations

This commit is contained in:
stack 2014-09-25 10:49:09 -07:00
parent 44a27c5cd7
commit 31ed817447
11 changed files with 118 additions and 143 deletions

View File

@ -188,6 +188,26 @@ public final class CellUtil {
return keyValue;
}
/**
* Create a Cell with specific row. Other fields are arbitrary choices.
* @param row
* @return Cell with passed row but all other fields are arbitrary
*/
public static Cell createCell(final byte [] row) {
return createCell(row, HConstants.EMPTY_BYTE_ARRAY);
}
/**
* Create a Cell with specific row and value. Other fields are arbitrary choices.
* @param row
* @param value
* @return Cell with passed row and value but all other fields are arbitrary
*/
public static Cell createCell(final byte [] row, final byte [] value) {
return createCell(row, HConstants.CATALOG_FAMILY, HConstants.SERVERNAME_QUALIFIER,
HConstants.LATEST_TIMESTAMP, (byte)0, value);
}
/**
* @param cellScannerables
* @return CellScanner interface over <code>cellIterables</code>
@ -684,4 +704,4 @@ public final class CellUtil {
}
return commonPrefix;
}
}
}

View File

@ -202,10 +202,6 @@ public class HFile {
void append(Cell cell) throws IOException;
void append(byte[] key, byte[] value) throws IOException;
void append (byte[] key, byte[] value, byte[] tag) throws IOException;
/** @return the path to this {@link HFile} */
Path getPath();

View File

@ -60,7 +60,7 @@ public interface HFileScanner {
@Deprecated
int seekTo(byte[] key, int offset, int length) throws IOException;
int seekTo(Cell kv) throws IOException;
int seekTo(Cell c) throws IOException;
/**
* Reseek to or just before the passed <code>key</code>. Similar to seekTo
* except that this can be called even if the scanner is not at the beginning
@ -86,7 +86,7 @@ public interface HFileScanner {
@Deprecated
int reseekTo(byte[] key, int offset, int length) throws IOException;
int reseekTo(Cell kv) throws IOException;
int reseekTo(Cell c) throws IOException;
/**
* Consider the key stream of all the keys in the file,
* <code>k[0] .. k[n]</code>, where there are n keys in the file.

View File

@ -33,7 +33,6 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
@ -244,8 +243,7 @@ public class HFileWriterV2 extends AbstractHFileWriter {
* Add key/value to file. Keys must be added in an order that agrees with the
* Comparator passed on construction.
*
* @param cell
* Cell to add. Cannot be empty nor null.
* @param cell Cell to add. Cannot be empty nor null.
* @throws IOException
*/
@Override
@ -254,6 +252,7 @@ public class HFileWriterV2 extends AbstractHFileWriter {
byte[] value = cell.getValueArray();
int voffset = cell.getValueOffset();
int vlength = cell.getValueLength();
// checkKey uses comparator to check we are writing in order.
boolean dupKey = checkKey(cell);
checkValue(value, voffset, vlength);
if (!dupKey) {
@ -270,7 +269,7 @@ public class HFileWriterV2 extends AbstractHFileWriter {
// Are we the first key in this block?
if (firstKeyInBlock == null) {
// Copy the key.
// Copy the key for use as first key in block. It is put into file index.
firstKeyInBlock = new byte[klength];
KeyValueUtil.appendKeyTo(cell, firstKeyInBlock, 0);
}
@ -281,28 +280,6 @@ public class HFileWriterV2 extends AbstractHFileWriter {
this.maxMemstoreTS = Math.max(this.maxMemstoreTS, cell.getSequenceId());
}
/**
* Add key/value to file. Keys must be added in an order that agrees with the
* Comparator passed on construction.
*
* @param key
* Key to add. Cannot be empty nor null.
* @param value
* Value to add. Cannot be empty nor null.
* @throws IOException
*/
@Override
public void append(final byte[] key, final byte[] value) throws IOException {
int kvlen = (int) KeyValue.getKeyValueDataStructureSize(key.length, value.length, 0);
byte[] b = new byte[kvlen];
int pos = 0;
pos = Bytes.putInt(b, pos, key.length);
pos = Bytes.putInt(b, pos, value.length);
pos = Bytes.putBytes(b, pos, key, 0, key.length);
Bytes.putBytes(b, pos, value, 0, value.length);
append(new KeyValue(b, 0, kvlen));
}
@Override
public void close() throws IOException {
if (outputStream == null) {
@ -426,11 +403,6 @@ public class HFileWriterV2 extends AbstractHFileWriter {
});
}
@Override
public void append(byte[] key, byte[] value, byte[] tag) throws IOException {
throw new UnsupportedOperationException("KV tags are supported only from HFile V3");
}
protected int getMajorVersion() {
return 2;
}

View File

@ -28,7 +28,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.io.crypto.Encryption;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
@ -91,47 +90,6 @@ public class HFileWriterV3 extends HFileWriterV2 {
this.maxTagsLength = tagsLength;
}
}
/**
* Add key/value to file. Keys must be added in an order that agrees with the
* Comparator passed on construction.
* @param key
* Key to add. Cannot be empty nor null.
* @param value
* Value to add. Cannot be empty nor null.
* @throws IOException
*/
@Override
public void append(final byte[] key, final byte[] value) throws IOException {
append(key, value, HConstants.EMPTY_BYTE_ARRAY);
}
/**
* Add key/value to file. Keys must be added in an order that agrees with the
* Comparator passed on construction.
* @param key
* Key to add. Cannot be empty nor null.
* @param value
* Value to add. Cannot be empty nor null.
* @param tag
* Tag t add. Cannot be empty or null.
* @throws IOException
*/
@Override
public void append(final byte[] key, final byte[] value, byte[] tag) throws IOException {
int kvlen = (int) KeyValue.getKeyValueDataStructureSize(key.length, value.length, tag.length);
byte[] b = new byte[kvlen];
int pos = 0;
pos = Bytes.putInt(b, pos, key.length);
pos = Bytes.putInt(b, pos, value.length);
pos = Bytes.putBytes(b, pos, key, 0, key.length);
pos = Bytes.putBytes(b, pos, value, 0, value.length);
if (tag.length > 0) {
pos = Bytes.putAsShort(b, pos, tag.length);
Bytes.putBytes(b, pos, tag, 0, tag.length);
}
append(new KeyValue(b, 0, kvlen));
}
protected void finishFileInfo() throws IOException {
super.finishFileInfo();

View File

@ -28,13 +28,18 @@ import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.io.compress.Compressor;
/**
@ -119,19 +124,25 @@ public class CompressionTest {
.withPath(fs, path)
.withFileContext(context)
.create();
writer.append(Bytes.toBytes("testkey"), Bytes.toBytes("testval"));
writer.appendFileInfo(Bytes.toBytes("infokey"), Bytes.toBytes("infoval"));
// Write any-old Cell...
final byte [] rowKey = Bytes.toBytes("compressiontestkey");
Cell c = CellUtil.createCell(rowKey, Bytes.toBytes("compressiontestval"));
writer.append(c);;
writer.appendFileInfo(Bytes.toBytes("compressioninfokey"), Bytes.toBytes("compressioninfoval"));
writer.close();
Cell cc = null;
HFile.Reader reader = HFile.createReader(fs, path, new CacheConfig(conf), conf);
reader.loadFileInfo();
byte[] key = reader.getFirstKey();
boolean rc = Bytes.toString(key).equals("testkey");
reader.close();
if (!rc) {
throw new Exception("Read back incorrect result: " +
Bytes.toStringBinary(key));
try {
reader.loadFileInfo();
HFileScanner scanner = reader.getScanner(false, true);
scanner.next();
// Scanner does not do Cells yet. Do below for now till fixed.
cc = scanner.getKeyValue();
if (CellComparator.compareRows(c, cc) != 0) {
throw new Exception("Read back incorrect result: " + c.toString() + " vs " + cc.toString());
}
} finally {
reader.close();
}
}

View File

@ -19,7 +19,6 @@
package org.apache.hadoop.hbase;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Random;
import org.apache.commons.logging.Log;
@ -38,12 +37,9 @@ import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.util.Bytes;
/**
* <p>
* This class runs performance benchmarks for {@link HFile}.
* </p>
*/
public class HFilePerformanceEvaluation {
private static final int ROW_LENGTH = 10;
private static final int ROW_COUNT = 1000000;
private static final int RFILE_BLOCKSIZE = 8 * 1024;
@ -61,6 +57,29 @@ public class HFilePerformanceEvaluation {
return w;
}
static Cell createCell(final int i) {
return createCell(i, HConstants.EMPTY_BYTE_ARRAY);
}
/**
* HFile is Cell-based. It used to be byte arrays. Doing this test, pass Cells. All Cells
* intentionally have same coordinates in all fields but row.
* @param i Integer to format as a row Key.
* @param value Value to use
* @return Created Cell.
*/
static Cell createCell(final int i, final byte [] value) {
return createCell(format(i), value);
}
static Cell createCell(final byte [] keyRow) {
return createCell(keyRow);
}
static Cell createCell(final byte [] keyRow, final byte [] value) {
return CellUtil.createCell(keyRow, value);
}
private void runBenchmarks() throws Exception {
final Configuration conf = new Configuration();
final FileSystem fs = FileSystem.get(conf);
@ -200,7 +219,7 @@ public class HFilePerformanceEvaluation {
@Override
void doRow(int i) throws Exception {
writer.append(format(i), generateValue());
writer.append(createCell(i, generateValue()));
}
private byte[] generateValue() {
@ -260,10 +279,10 @@ public class HFilePerformanceEvaluation {
@Override
void doRow(int i) throws Exception {
if (this.scanner.next()) {
ByteBuffer k = this.scanner.getKey();
PerformanceEvaluationCommons.assertKey(format(i + 1), k);
ByteBuffer v = scanner.getValue();
PerformanceEvaluationCommons.assertValueSize(v.limit(), ROW_LENGTH);
// TODO: Fix. Make Scanner do Cells.
Cell c = this.scanner.getKeyValue();
PerformanceEvaluationCommons.assertKey(format(i + 1), c);
PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
}
}
@ -287,14 +306,14 @@ public class HFilePerformanceEvaluation {
void doRow(int i) throws Exception {
HFileScanner scanner = this.reader.getScanner(false, true);
byte [] b = getRandomRow();
if (scanner.seekTo(b) < 0) {
if (scanner.seekTo(createCell(b)) < 0) {
LOG.info("Not able to seekTo " + new String(b));
return;
}
ByteBuffer k = scanner.getKey();
PerformanceEvaluationCommons.assertKey(b, k);
ByteBuffer v = scanner.getValue();
PerformanceEvaluationCommons.assertValueSize(v.limit(), ROW_LENGTH);
// TODO: Fix scanner so it does Cells
Cell c = scanner.getKeyValue();
PerformanceEvaluationCommons.assertKey(b, c);
PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
}
private byte [] getRandomRow() {
@ -314,20 +333,24 @@ public class HFilePerformanceEvaluation {
void doRow(int i) throws Exception {
HFileScanner scanner = this.reader.getScanner(false, false);
byte [] b = getRandomRow();
if (scanner.seekTo(b) != 0) {
// System.out.println("Random row: " + new String(b));
Cell c = createCell(b);
if (scanner.seekTo(c) != 0) {
LOG.info("Nonexistent row: " + new String(b));
return;
}
ByteBuffer k = scanner.getKey();
PerformanceEvaluationCommons.assertKey(b, k);
// System.out.println("Found row: " + new String(b));
// TODO: HFileScanner doesn't do Cells yet. Temporary fix.
c = scanner.getKeyValue();
// System.out.println("Found row: " +
// new String(c.getRowArray(), c.getRowOffset(), c.getRowLength()));
PerformanceEvaluationCommons.assertKey(b, c);
for (int ii = 0; ii < 30; ii++) {
if (!scanner.next()) {
LOG.info("NOTHING FOLLOWS");
return;
}
ByteBuffer v = scanner.getValue();
PerformanceEvaluationCommons.assertValueSize(v.limit(), ROW_LENGTH);
c = scanner.getKeyValue();
PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
}
}
@ -349,14 +372,14 @@ public class HFilePerformanceEvaluation {
void doRow(int i) throws Exception {
HFileScanner scanner = this.reader.getScanner(false, true);
byte[] gaussianRandomRowBytes = getGaussianRandomRowBytes();
scanner.seekTo(gaussianRandomRowBytes);
scanner.seekTo(createCell(gaussianRandomRowBytes));
for (int ii = 0; ii < 30; ii++) {
if (!scanner.next()) {
LOG.info("NOTHING FOLLOWS");
return;
}
scanner.getKey();
scanner.getValue();
// TODO: Fix. Make scanner do Cells.
scanner.getKeyValue();
}
}

View File

@ -45,11 +45,22 @@ public class PerformanceEvaluationCommons {
assertKey(expected, b);
}
public static void assertKey(final byte [] expected, final Cell c) {
assertKey(expected, c.getRowArray(), c.getRowOffset(), c.getRowLength());
}
public static void assertKey(final byte [] expected, final byte [] got) {
if (!org.apache.hadoop.hbase.util.Bytes.equals(expected, got)) {
assertKey(expected, got, 0, got.length);
}
public static void assertKey(final byte [] expected, final byte [] gotArray,
final int gotArrayOffset, final int gotArrayLength) {
if (!org.apache.hadoop.hbase.util.Bytes.equals(expected, 0, expected.length,
gotArray, gotArrayOffset, gotArrayLength)) {
throw new AssertionError("Expected " +
org.apache.hadoop.hbase.util.Bytes.toString(expected) +
" but got " + org.apache.hadoop.hbase.util.Bytes.toString(got));
" but got " +
org.apache.hadoop.hbase.util.Bytes.toString(gotArray, gotArrayOffset, gotArrayLength));
}
}

View File

@ -43,7 +43,6 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.fs.HFileSystem;
@ -542,15 +541,12 @@ public class TestHFileBlockIndex {
byte[] row = TestHFileWriterV2.randomOrderedKey(rand, i);
// Key will be interpreted by KeyValue.KEY_COMPARATOR
byte[] k = KeyValueUtil.createFirstOnRow(row, 0, row.length, row, 0, 0,
row, 0, 0).getKey();
byte[] v = TestHFileWriterV2.randomValue(rand);
writer.append(k, v, HConstants.EMPTY_BYTE_ARRAY);
KeyValue kv = KeyValueUtil.createFirstOnRow(row, 0, row.length, row, 0, 0,
row, 0, 0);
byte[] k = kv.getKey();
writer.append(kv);
keys[i] = k;
values[i] = v;
keyStrSet.add(Bytes.toStringBinary(k));
if (i > 0) {
assertTrue(KeyValue.COMPARATOR.compareFlatKey(keys[i - 1],
keys[i]) < 0);

View File

@ -22,8 +22,8 @@ import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.testclassification.IOTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
@ -72,30 +72,17 @@ public class TestHFileInlineToRootChunkConversion {
sb.setLength(0);
byte[] k = Bytes.toBytes(keyStr);
System.out.println("RowKey: " + Bytes.toString(k));
byte[] f = "f1".getBytes();
byte[] q = "q1".getBytes();
int keySize = (int) KeyValue.getKeyDataStructureSize(k.length, f.length, q.length);
byte[] bytes = new byte[keySize];
int pos = 0;
pos = Bytes.putShort(bytes, pos, (short) (k.length & 0x0000ffff));
pos = Bytes.putBytes(bytes, pos, k, 0, k.length);
pos = Bytes.putByte(bytes, pos, (byte) f.length);
pos = Bytes.putBytes(bytes, pos, f, 0, f.length);
pos = Bytes.putBytes(bytes, pos, q, 0, q.length);
pos = Bytes.putLong(bytes, pos, System.currentTimeMillis());
pos = Bytes.putByte(bytes, pos, KeyValue.Type.Put.getCode());
keys.add(bytes);
keys.add(k);
byte[] v = Bytes.toBytes("value" + i);
hfw.append(bytes, v);
hfw.append(CellUtil.createCell(k, v));
}
hfw.close();
HFileReaderV2 reader = (HFileReaderV2) HFile.createReader(fs, hfPath, cacheConf, conf);
// Scanner doesn't do Cells yet. Fix.
HFileScanner scanner = reader.getScanner(true, true);
for (int i = 0; i < keys.size(); ++i) {
scanner.seekTo(KeyValue.createKeyValueFromKey(keys.get(i)));
scanner.seekTo(CellUtil.createCell(keys.get(i)));
}
reader.close();
}

View File

@ -30,6 +30,7 @@ import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
@ -196,7 +197,7 @@ public class TestHFilePerformance extends AbstractHBaseTool {
for (long l=0; l<rows; l++ ) {
generator.getKey(key);
generator.getValue(value);
writer.append(key, value);
writer.append(CellUtil.createCell(key, value));
totalBytesWritten += key.length;
totalBytesWritten += value.length;
}