HADOOP-2479 Save on number of Text object creations
git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@606332 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0465b18a35
commit
87273adb1c
|
@ -19,6 +19,7 @@ Trunk (unreleased changes)
|
|||
(Bryan Duxbury via Stack)
|
||||
|
||||
OPTIMIZATIONS
|
||||
HADOOP-2479 Save on number of Text object creations
|
||||
|
||||
BUG FIXES
|
||||
HADOOP-2059 In tests, exceptions in min dfs shutdown should not fail test
|
||||
|
|
|
@ -64,7 +64,7 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
|
|||
private static class ColumnMatcher {
|
||||
private boolean wildCardmatch;
|
||||
private MATCH_TYPE matchType;
|
||||
private String family;
|
||||
private Text family;
|
||||
private Pattern columnMatcher;
|
||||
private Text col;
|
||||
|
||||
|
@ -73,7 +73,7 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
|
|||
try {
|
||||
if(qualifier == null || qualifier.getLength() == 0) {
|
||||
this.matchType = MATCH_TYPE.FAMILY_ONLY;
|
||||
this.family = HStoreKey.extractFamily(col).toString();
|
||||
this.family = HStoreKey.extractFamily(col).toText();
|
||||
this.wildCardmatch = true;
|
||||
} else if(isRegexPattern.matcher(qualifier.toString()).matches()) {
|
||||
this.matchType = MATCH_TYPE.REGEX;
|
||||
|
@ -93,13 +93,10 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
|
|||
boolean matches(Text c) throws IOException {
|
||||
if(this.matchType == MATCH_TYPE.SIMPLE) {
|
||||
return c.equals(this.col);
|
||||
|
||||
} else if(this.matchType == MATCH_TYPE.FAMILY_ONLY) {
|
||||
return HStoreKey.extractFamily(c).toString().equals(this.family);
|
||||
|
||||
return HStoreKey.extractFamily(c).equals(this.family);
|
||||
} else if(this.matchType == MATCH_TYPE.REGEX) {
|
||||
return this.columnMatcher.matcher(c.toString()).matches();
|
||||
|
||||
} else {
|
||||
throw new IOException("Invalid match type: " + this.matchType);
|
||||
}
|
||||
|
@ -130,7 +127,7 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
|
|||
this.multipleMatchers = false;
|
||||
this.okCols = new TreeMap<Text, Vector<ColumnMatcher>>();
|
||||
for(int i = 0; i < targetCols.length; i++) {
|
||||
Text family = HStoreKey.extractFamily(targetCols[i]);
|
||||
Text family = HStoreKey.extractFamily(targetCols[i]).toText();
|
||||
Vector<ColumnMatcher> matchers = okCols.get(family);
|
||||
if(matchers == null) {
|
||||
matchers = new Vector<ColumnMatcher>();
|
||||
|
@ -160,8 +157,8 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
|
|||
*/
|
||||
boolean columnMatch(int i) throws IOException {
|
||||
Text column = keys[i].getColumn();
|
||||
Text family = HStoreKey.extractFamily(column);
|
||||
Vector<ColumnMatcher> matchers = okCols.get(family);
|
||||
Vector<ColumnMatcher> matchers =
|
||||
okCols.get(HStoreKey.extractFamily(column));
|
||||
if(matchers == null) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -283,7 +283,7 @@ public class HRegion implements HConstants {
|
|||
long maxSeqId = -1;
|
||||
for(Map.Entry<Text, HColumnDescriptor> e :
|
||||
this.regionInfo.getTableDesc().families().entrySet()) {
|
||||
Text colFamily = HStoreKey.extractFamily(e.getKey());
|
||||
Text colFamily = HStoreKey.extractFamily(e.getKey()).toText();
|
||||
|
||||
HStore store = new HStore(rootDir, this.regionInfo.getRegionName(),
|
||||
this.encodedRegionName, e.getValue(), fs, oldLogFile, conf);
|
||||
|
|
|
@ -44,6 +44,7 @@ import org.apache.hadoop.fs.FileSystem;
|
|||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.filter.RowFilterInterface;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.io.TextSequence;
|
||||
import org.apache.hadoop.io.MapFile;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
@ -565,7 +566,7 @@ class HStore implements HConstants {
|
|||
this.regionName = regionName;
|
||||
this.encodedRegionName = encodedName;
|
||||
this.family = family;
|
||||
this.familyName = HStoreKey.extractFamily(this.family.getName());
|
||||
this.familyName = HStoreKey.extractFamily(this.family.getName()).toText();
|
||||
this.compression = SequenceFile.CompressionType.NONE;
|
||||
this.storeName = this.encodedRegionName + "/" + this.familyName.toString();
|
||||
|
||||
|
@ -939,8 +940,8 @@ class HStore implements HConstants {
|
|||
try {
|
||||
for (Map.Entry<HStoreKey, byte []> es: cache.entrySet()) {
|
||||
HStoreKey curkey = es.getKey();
|
||||
if (this.familyName.equals(HStoreKey.extractFamily(
|
||||
curkey.getColumn()))) {
|
||||
TextSequence f = HStoreKey.extractFamily(curkey.getColumn());
|
||||
if (f.equals(this.familyName)) {
|
||||
out.append(curkey, new ImmutableBytesWritable(es.getValue()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase;
|
||||
|
||||
import org.apache.hadoop.hbase.io.TextSequence;
|
||||
import org.apache.hadoop.io.*;
|
||||
|
||||
import java.io.*;
|
||||
|
@ -27,94 +28,14 @@ import java.io.*;
|
|||
* A Key for a stored row
|
||||
*/
|
||||
public class HStoreKey implements WritableComparable {
|
||||
/**
|
||||
* Colon character in UTF-8
|
||||
*/
|
||||
public static final char COLUMN_FAMILY_DELIMITER = ':';
|
||||
|
||||
// TODO: Move these utility methods elsewhere (To a Column class?).
|
||||
/**
|
||||
* Extracts the column family name from a column
|
||||
* For example, returns 'info' if the specified column was 'info:server'
|
||||
* @param col name of column
|
||||
* @return column family name
|
||||
* @throws InvalidColumnNameException
|
||||
*/
|
||||
public static Text extractFamily(final Text col)
|
||||
throws InvalidColumnNameException {
|
||||
return extractFamily(col, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the column family name from a column
|
||||
* For example, returns 'info' if the specified column was 'info:server'
|
||||
* @param col name of column
|
||||
* @param withColon if returned family name should include the ':' suffix.
|
||||
* @return column family name
|
||||
* @throws InvalidColumnNameException
|
||||
*/
|
||||
public static Text extractFamily(final Text col, final boolean withColon)
|
||||
throws InvalidColumnNameException {
|
||||
int offset = getColonOffset(col);
|
||||
// Include ':' in copy?
|
||||
offset += (withColon)? 1: 0;
|
||||
if (offset == col.getLength()) {
|
||||
return col;
|
||||
}
|
||||
byte [] buffer = new byte[offset];
|
||||
System.arraycopy(col.getBytes(), 0, buffer, 0, offset);
|
||||
return new Text(buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the column qualifier, the portion that follows the colon (':')
|
||||
* family/qualifier separator.
|
||||
* For example, returns 'server' if the specified column was 'info:server'
|
||||
* @param col name of column
|
||||
* @return column qualifier or null if there is no qualifier.
|
||||
* @throws InvalidColumnNameException
|
||||
*/
|
||||
public static Text extractQualifier(final Text col)
|
||||
throws InvalidColumnNameException {
|
||||
int offset = getColonOffset(col);
|
||||
if (offset + 1 == col.getLength()) {
|
||||
return null;
|
||||
}
|
||||
int bufferLength = col.getLength() - (offset + 1);
|
||||
byte [] buffer = new byte[bufferLength];
|
||||
System.arraycopy(col.getBytes(), offset + 1, buffer, 0, bufferLength);
|
||||
return new Text(buffer);
|
||||
}
|
||||
|
||||
private static int getColonOffset(final Text col)
|
||||
throws InvalidColumnNameException {
|
||||
int offset = -1;
|
||||
for (int i = 0; i < col.getLength(); i++) {
|
||||
if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) {
|
||||
offset = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(offset < 0) {
|
||||
throw new InvalidColumnNameException(col + " is missing the colon " +
|
||||
"family/qualifier separator");
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns row and column bytes out of an HStoreKey.
|
||||
* @param hsk Store key.
|
||||
* @return byte array encoding of HStoreKey
|
||||
* @throws UnsupportedEncodingException
|
||||
*/
|
||||
public static byte[] getBytes(final HStoreKey hsk)
|
||||
throws UnsupportedEncodingException {
|
||||
StringBuilder s = new StringBuilder(hsk.getRow().toString());
|
||||
s.append(hsk.getColumn().toString());
|
||||
return s.toString().getBytes(HConstants.UTF8_ENCODING);
|
||||
}
|
||||
|
||||
Text row;
|
||||
Text column;
|
||||
long timestamp;
|
||||
private Text row;
|
||||
private Text column;
|
||||
private long timestamp;
|
||||
|
||||
|
||||
/** Default constructor used in conjunction with Writable interface */
|
||||
|
@ -163,6 +84,7 @@ public class HStoreKey implements WritableComparable {
|
|||
* @param timestamp timestamp value
|
||||
*/
|
||||
public HStoreKey(Text row, Text column, long timestamp) {
|
||||
// Make copies by doing 'new Text(arg)'.
|
||||
this.row = new Text(row);
|
||||
this.column = new Text(column);
|
||||
this.timestamp = timestamp;
|
||||
|
@ -339,4 +261,90 @@ public class HStoreKey implements WritableComparable {
|
|||
column.readFields(in);
|
||||
timestamp = in.readLong();
|
||||
}
|
||||
|
||||
// Statics
|
||||
// TODO: Move these utility methods elsewhere (To a Column class?).
|
||||
|
||||
/**
|
||||
* Extracts the column family name from a column
|
||||
* For example, returns 'info' if the specified column was 'info:server'
|
||||
* @param col name of column
|
||||
* @return column famile as a TextSequence based on the passed
|
||||
* <code>col</code>. If <code>col</code> is reused, make a new Text of
|
||||
* the result by calling {@link TextSequence#toText()}.
|
||||
* @throws InvalidColumnNameException
|
||||
*/
|
||||
public static TextSequence extractFamily(final Text col)
|
||||
throws InvalidColumnNameException {
|
||||
return extractFamily(col, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the column family name from a column
|
||||
* For example, returns 'info' if the specified column was 'info:server'
|
||||
* @param col name of column
|
||||
* @return column famile as a TextSequence based on the passed
|
||||
* <code>col</code>. If <code>col</code> is reused, make a new Text of
|
||||
* the result by calling {@link TextSequence#toText()}.
|
||||
* @throws InvalidColumnNameException
|
||||
*/
|
||||
public static TextSequence extractFamily(final Text col,
|
||||
final boolean withColon)
|
||||
throws InvalidColumnNameException {
|
||||
int offset = getColonOffset(col);
|
||||
// Include ':' in copy?
|
||||
offset += (withColon)? 1: 0;
|
||||
if (offset == col.getLength()) {
|
||||
return new TextSequence(col);
|
||||
}
|
||||
return new TextSequence(col, 0, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the column qualifier, the portion that follows the colon (':')
|
||||
* family/qualifier separator.
|
||||
* For example, returns 'server' if the specified column was 'info:server'
|
||||
* @param col name of column
|
||||
* @return column qualifier as a TextSequence based on the passed
|
||||
* <code>col</code>. If <code>col</code> is reused, make a new Text of
|
||||
* the result by calling {@link TextSequence#toText()}.
|
||||
* @throws InvalidColumnNameException
|
||||
*/
|
||||
public static TextSequence extractQualifier(final Text col)
|
||||
throws InvalidColumnNameException {
|
||||
int offset = getColonOffset(col);
|
||||
if (offset + 1 == col.getLength()) {
|
||||
return null;
|
||||
}
|
||||
return new TextSequence(col, offset + 1);
|
||||
}
|
||||
|
||||
private static int getColonOffset(final Text col)
|
||||
throws InvalidColumnNameException {
|
||||
int offset = -1;
|
||||
for (int i = 0; i < col.getLength(); i++) {
|
||||
if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) {
|
||||
offset = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(offset < 0) {
|
||||
throw new InvalidColumnNameException(col + " is missing the colon " +
|
||||
"family/qualifier separator");
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns row and column bytes out of an HStoreKey.
|
||||
* @param hsk Store key.
|
||||
* @return byte array encoding of HStoreKey
|
||||
* @throws UnsupportedEncodingException
|
||||
*/
|
||||
public static byte[] getBytes(final HStoreKey hsk)
|
||||
throws UnsupportedEncodingException {
|
||||
StringBuilder s = new StringBuilder(hsk.getRow().toString());
|
||||
s.append(hsk.getColumn().toString());
|
||||
return s.toString().getBytes(HConstants.UTF8_ENCODING);
|
||||
}
|
||||
}
|
|
@ -21,7 +21,6 @@ package org.apache.hadoop.hbase;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
|
@ -472,25 +471,28 @@ public class PerformanceEvaluation implements HConstants {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Format passed integer.
|
||||
* This method takes some time and is done inline uploading data. For
|
||||
* example, doing the mapfile test, generation of the key and value
|
||||
* consumes about 30% of CPU time.
|
||||
* @param i
|
||||
* @return Integer as String zero padded.
|
||||
*/
|
||||
static Text format(final int i) {
|
||||
return new Text(String.format("%010d", Integer.valueOf(i)));
|
||||
}
|
||||
|
||||
/*
|
||||
* This method takes some time and is done inline uploading data. For
|
||||
* example, doing the mapfile test, generation of the key and value
|
||||
* consumes about 30% of CPU time.
|
||||
* @return Generated random value to insert into a table cell.
|
||||
*/
|
||||
static byte[] generateValue(final Random r) {
|
||||
StringBuilder val = new StringBuilder();
|
||||
while(val.length() < ROW_LENGTH) {
|
||||
val.append(Long.toString(r.nextLong()));
|
||||
}
|
||||
byte[] value = null;
|
||||
try {
|
||||
value = val.toString().getBytes(HConstants.UTF8_ENCODING);
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
assert(false);
|
||||
}
|
||||
return value;
|
||||
byte [] b = new byte [ROW_LENGTH];
|
||||
r.nextBytes(b);
|
||||
return b;
|
||||
}
|
||||
|
||||
static Text getRandomRow(final Random random, final int totalRows) {
|
||||
|
@ -556,7 +558,7 @@ public class PerformanceEvaluation implements HConstants {
|
|||
Random random = new Random();
|
||||
Configuration c = new Configuration();
|
||||
FileSystem fs = FileSystem.get(c);
|
||||
Path mf = new Path("performanceevaluation.mapfile");
|
||||
Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
|
||||
if (fs.exists(mf)) {
|
||||
fs.delete(mf);
|
||||
}
|
||||
|
@ -571,7 +573,9 @@ public class PerformanceEvaluation implements HConstants {
|
|||
}
|
||||
writer.close();
|
||||
LOG.info("Writing " + ROW_COUNT + " records took " +
|
||||
(System.currentTimeMillis() - startTime) + "ms");
|
||||
(System.currentTimeMillis() - startTime) + "ms (Note: generation of keys " +
|
||||
"and values is done inline and has been seen to consume " +
|
||||
"significant time: e.g. ~30% of cpu time");
|
||||
// Do random reads.
|
||||
LOG.info("Reading " + ROW_COUNT + " random rows");
|
||||
MapFile.Reader reader = new MapFile.Reader(fs, mf.toString(), c);
|
||||
|
@ -585,7 +589,9 @@ public class PerformanceEvaluation implements HConstants {
|
|||
}
|
||||
reader.close();
|
||||
LOG.info("Reading " + ROW_COUNT + " random records took " +
|
||||
(System.currentTimeMillis() - startTime) + "ms");
|
||||
(System.currentTimeMillis() - startTime) + "ms (Note: generation of " +
|
||||
"random key is done in line and takes a significant amount of cpu " +
|
||||
"time: e.g 10-15%");
|
||||
// Do random reads.
|
||||
LOG.info("Reading " + ROW_COUNT + " rows sequentially");
|
||||
reader = new MapFile.Reader(fs, mf.toString(), c);
|
||||
|
@ -599,7 +605,7 @@ public class PerformanceEvaluation implements HConstants {
|
|||
LOG.info("Reading " + ROW_COUNT + " records serially took " +
|
||||
(System.currentTimeMillis() - startTime) + "ms");
|
||||
}
|
||||
|
||||
|
||||
private void runTest(final String cmd) throws IOException {
|
||||
if (cmd.equals(RANDOM_READ_MEM)) {
|
||||
// For this one test, so all fits in memory, make R smaller (See
|
||||
|
|
Loading…
Reference in New Issue