HADOOP-2479 Save on number of Text object creations

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@606332 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2007-12-21 21:58:25 +00:00
parent 0465b18a35
commit 87273adb1c
6 changed files with 128 additions and 115 deletions

View File

@ -19,6 +19,7 @@ Trunk (unreleased changes)
(Bryan Duxbury via Stack)
OPTIMIZATIONS
HADOOP-2479 Save on number of Text object creations
BUG FIXES
HADOOP-2059 In tests, exceptions in min dfs shutdown should not fail test

View File

@ -64,7 +64,7 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
private static class ColumnMatcher {
private boolean wildCardmatch;
private MATCH_TYPE matchType;
private String family;
private Text family;
private Pattern columnMatcher;
private Text col;
@ -73,7 +73,7 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
try {
if(qualifier == null || qualifier.getLength() == 0) {
this.matchType = MATCH_TYPE.FAMILY_ONLY;
this.family = HStoreKey.extractFamily(col).toString();
this.family = HStoreKey.extractFamily(col).toText();
this.wildCardmatch = true;
} else if(isRegexPattern.matcher(qualifier.toString()).matches()) {
this.matchType = MATCH_TYPE.REGEX;
@ -93,13 +93,10 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
boolean matches(Text c) throws IOException {
if(this.matchType == MATCH_TYPE.SIMPLE) {
return c.equals(this.col);
} else if(this.matchType == MATCH_TYPE.FAMILY_ONLY) {
return HStoreKey.extractFamily(c).toString().equals(this.family);
return HStoreKey.extractFamily(c).equals(this.family);
} else if(this.matchType == MATCH_TYPE.REGEX) {
return this.columnMatcher.matcher(c.toString()).matches();
} else {
throw new IOException("Invalid match type: " + this.matchType);
}
@ -130,7 +127,7 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
this.multipleMatchers = false;
this.okCols = new TreeMap<Text, Vector<ColumnMatcher>>();
for(int i = 0; i < targetCols.length; i++) {
Text family = HStoreKey.extractFamily(targetCols[i]);
Text family = HStoreKey.extractFamily(targetCols[i]).toText();
Vector<ColumnMatcher> matchers = okCols.get(family);
if(matchers == null) {
matchers = new Vector<ColumnMatcher>();
@ -160,8 +157,8 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
*/
boolean columnMatch(int i) throws IOException {
Text column = keys[i].getColumn();
Text family = HStoreKey.extractFamily(column);
Vector<ColumnMatcher> matchers = okCols.get(family);
Vector<ColumnMatcher> matchers =
okCols.get(HStoreKey.extractFamily(column));
if(matchers == null) {
return false;
}

View File

@ -283,7 +283,7 @@ public class HRegion implements HConstants {
long maxSeqId = -1;
for(Map.Entry<Text, HColumnDescriptor> e :
this.regionInfo.getTableDesc().families().entrySet()) {
Text colFamily = HStoreKey.extractFamily(e.getKey());
Text colFamily = HStoreKey.extractFamily(e.getKey()).toText();
HStore store = new HStore(rootDir, this.regionInfo.getRegionName(),
this.encodedRegionName, e.getValue(), fs, oldLogFile, conf);

View File

@ -44,6 +44,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.filter.RowFilterInterface;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.TextSequence;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
@ -565,7 +566,7 @@ class HStore implements HConstants {
this.regionName = regionName;
this.encodedRegionName = encodedName;
this.family = family;
this.familyName = HStoreKey.extractFamily(this.family.getName());
this.familyName = HStoreKey.extractFamily(this.family.getName()).toText();
this.compression = SequenceFile.CompressionType.NONE;
this.storeName = this.encodedRegionName + "/" + this.familyName.toString();
@ -939,8 +940,8 @@ class HStore implements HConstants {
try {
for (Map.Entry<HStoreKey, byte []> es: cache.entrySet()) {
HStoreKey curkey = es.getKey();
if (this.familyName.equals(HStoreKey.extractFamily(
curkey.getColumn()))) {
TextSequence f = HStoreKey.extractFamily(curkey.getColumn());
if (f.equals(this.familyName)) {
out.append(curkey, new ImmutableBytesWritable(es.getValue()));
}
}

View File

@ -19,6 +19,7 @@
*/
package org.apache.hadoop.hbase;
import org.apache.hadoop.hbase.io.TextSequence;
import org.apache.hadoop.io.*;
import java.io.*;
@ -27,94 +28,14 @@ import java.io.*;
* A Key for a stored row
*/
public class HStoreKey implements WritableComparable {
/**
* Colon character in UTF-8
*/
public static final char COLUMN_FAMILY_DELIMITER = ':';
// TODO: Move these utility methods elsewhere (To a Column class?).
/**
* Extracts the column family name from a column
* For example, returns 'info' if the specified column was 'info:server'
* @param col name of column
* @return column family name
* @throws InvalidColumnNameException
*/
public static Text extractFamily(final Text col)
throws InvalidColumnNameException {
return extractFamily(col, false);
}
/**
* Extracts the column family name from a column
* For example, returns 'info' if the specified column was 'info:server'
* @param col name of column
* @param withColon if returned family name should include the ':' suffix.
* @return column family name
* @throws InvalidColumnNameException
*/
public static Text extractFamily(final Text col, final boolean withColon)
throws InvalidColumnNameException {
int offset = getColonOffset(col);
// Include ':' in copy?
offset += (withColon)? 1: 0;
if (offset == col.getLength()) {
return col;
}
byte [] buffer = new byte[offset];
System.arraycopy(col.getBytes(), 0, buffer, 0, offset);
return new Text(buffer);
}
/**
* Extracts the column qualifier, the portion that follows the colon (':')
* family/qualifier separator.
* For example, returns 'server' if the specified column was 'info:server'
* @param col name of column
* @return column qualifier or null if there is no qualifier.
* @throws InvalidColumnNameException
*/
public static Text extractQualifier(final Text col)
throws InvalidColumnNameException {
int offset = getColonOffset(col);
if (offset + 1 == col.getLength()) {
return null;
}
int bufferLength = col.getLength() - (offset + 1);
byte [] buffer = new byte[bufferLength];
System.arraycopy(col.getBytes(), offset + 1, buffer, 0, bufferLength);
return new Text(buffer);
}
private static int getColonOffset(final Text col)
throws InvalidColumnNameException {
int offset = -1;
for (int i = 0; i < col.getLength(); i++) {
if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) {
offset = i;
break;
}
}
if(offset < 0) {
throw new InvalidColumnNameException(col + " is missing the colon " +
"family/qualifier separator");
}
return offset;
}
/**
* Returns row and column bytes out of an HStoreKey.
* @param hsk Store key.
* @return byte array encoding of HStoreKey
* @throws UnsupportedEncodingException
*/
public static byte[] getBytes(final HStoreKey hsk)
throws UnsupportedEncodingException {
StringBuilder s = new StringBuilder(hsk.getRow().toString());
s.append(hsk.getColumn().toString());
return s.toString().getBytes(HConstants.UTF8_ENCODING);
}
Text row;
Text column;
long timestamp;
private Text row;
private Text column;
private long timestamp;
/** Default constructor used in conjunction with Writable interface */
@ -163,6 +84,7 @@ public class HStoreKey implements WritableComparable {
* @param timestamp timestamp value
*/
public HStoreKey(Text row, Text column, long timestamp) {
// Make copies by doing 'new Text(arg)'.
this.row = new Text(row);
this.column = new Text(column);
this.timestamp = timestamp;
@ -339,4 +261,90 @@ public class HStoreKey implements WritableComparable {
column.readFields(in);
timestamp = in.readLong();
}
// Statics
// TODO: Move these utility methods elsewhere (To a Column class?).
/**
* Extracts the column family name from a column
* For example, returns 'info' if the specified column was 'info:server'
* @param col name of column
* @return column famile as a TextSequence based on the passed
* <code>col</code>. If <code>col</code> is reused, make a new Text of
* the result by calling {@link TextSequence#toText()}.
* @throws InvalidColumnNameException
*/
public static TextSequence extractFamily(final Text col)
throws InvalidColumnNameException {
return extractFamily(col, false);
}
/**
* Extracts the column family name from a column
* For example, returns 'info' if the specified column was 'info:server'
* @param col name of column
* @return column famile as a TextSequence based on the passed
* <code>col</code>. If <code>col</code> is reused, make a new Text of
* the result by calling {@link TextSequence#toText()}.
* @throws InvalidColumnNameException
*/
public static TextSequence extractFamily(final Text col,
final boolean withColon)
throws InvalidColumnNameException {
int offset = getColonOffset(col);
// Include ':' in copy?
offset += (withColon)? 1: 0;
if (offset == col.getLength()) {
return new TextSequence(col);
}
return new TextSequence(col, 0, offset);
}
/**
* Extracts the column qualifier, the portion that follows the colon (':')
* family/qualifier separator.
* For example, returns 'server' if the specified column was 'info:server'
* @param col name of column
* @return column qualifier as a TextSequence based on the passed
* <code>col</code>. If <code>col</code> is reused, make a new Text of
* the result by calling {@link TextSequence#toText()}.
* @throws InvalidColumnNameException
*/
public static TextSequence extractQualifier(final Text col)
throws InvalidColumnNameException {
int offset = getColonOffset(col);
if (offset + 1 == col.getLength()) {
return null;
}
return new TextSequence(col, offset + 1);
}
private static int getColonOffset(final Text col)
throws InvalidColumnNameException {
int offset = -1;
for (int i = 0; i < col.getLength(); i++) {
if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) {
offset = i;
break;
}
}
if(offset < 0) {
throw new InvalidColumnNameException(col + " is missing the colon " +
"family/qualifier separator");
}
return offset;
}
/**
* Returns row and column bytes out of an HStoreKey.
* @param hsk Store key.
* @return byte array encoding of HStoreKey
* @throws UnsupportedEncodingException
*/
public static byte[] getBytes(final HStoreKey hsk)
throws UnsupportedEncodingException {
StringBuilder s = new StringBuilder(hsk.getRow().toString());
s.append(hsk.getColumn().toString());
return s.toString().getBytes(HConstants.UTF8_ENCODING);
}
}

View File

@ -21,7 +21,6 @@ package org.apache.hadoop.hbase;
import java.io.IOException;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
@ -472,25 +471,28 @@ public class PerformanceEvaluation implements HConstants {
}
}
/*
* Format passed integer.
* This method takes some time and is done inline uploading data. For
* example, doing the mapfile test, generation of the key and value
* consumes about 30% of CPU time.
* @param i
* @return Integer as String zero padded.
*/
static Text format(final int i) {
return new Text(String.format("%010d", Integer.valueOf(i)));
}
/*
* This method takes some time and is done inline uploading data. For
* example, doing the mapfile test, generation of the key and value
* consumes about 30% of CPU time.
* @return Generated random value to insert into a table cell.
*/
static byte[] generateValue(final Random r) {
StringBuilder val = new StringBuilder();
while(val.length() < ROW_LENGTH) {
val.append(Long.toString(r.nextLong()));
}
byte[] value = null;
try {
value = val.toString().getBytes(HConstants.UTF8_ENCODING);
} catch (UnsupportedEncodingException e) {
assert(false);
}
return value;
byte [] b = new byte [ROW_LENGTH];
r.nextBytes(b);
return b;
}
static Text getRandomRow(final Random random, final int totalRows) {
@ -556,7 +558,7 @@ public class PerformanceEvaluation implements HConstants {
Random random = new Random();
Configuration c = new Configuration();
FileSystem fs = FileSystem.get(c);
Path mf = new Path("performanceevaluation.mapfile");
Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
if (fs.exists(mf)) {
fs.delete(mf);
}
@ -571,7 +573,9 @@ public class PerformanceEvaluation implements HConstants {
}
writer.close();
LOG.info("Writing " + ROW_COUNT + " records took " +
(System.currentTimeMillis() - startTime) + "ms");
(System.currentTimeMillis() - startTime) + "ms (Note: generation of keys " +
"and values is done inline and has been seen to consume " +
"significant time: e.g. ~30% of cpu time");
// Do random reads.
LOG.info("Reading " + ROW_COUNT + " random rows");
MapFile.Reader reader = new MapFile.Reader(fs, mf.toString(), c);
@ -585,7 +589,9 @@ public class PerformanceEvaluation implements HConstants {
}
reader.close();
LOG.info("Reading " + ROW_COUNT + " random records took " +
(System.currentTimeMillis() - startTime) + "ms");
(System.currentTimeMillis() - startTime) + "ms (Note: generation of " +
"random key is done in line and takes a significant amount of cpu " +
"time: e.g 10-15%");
// Do random reads.
LOG.info("Reading " + ROW_COUNT + " rows sequentially");
reader = new MapFile.Reader(fs, mf.toString(), c);