HADOOP-2479 Save on number of Text object creations

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@606332 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2007-12-21 21:58:25 +00:00
parent 0465b18a35
commit 87273adb1c
6 changed files with 128 additions and 115 deletions

View File

@ -19,6 +19,7 @@ Trunk (unreleased changes)
(Bryan Duxbury via Stack) (Bryan Duxbury via Stack)
OPTIMIZATIONS OPTIMIZATIONS
HADOOP-2479 Save on number of Text object creations
BUG FIXES BUG FIXES
HADOOP-2059 In tests, exceptions in min dfs shutdown should not fail test HADOOP-2059 In tests, exceptions in min dfs shutdown should not fail test

View File

@ -64,7 +64,7 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
private static class ColumnMatcher { private static class ColumnMatcher {
private boolean wildCardmatch; private boolean wildCardmatch;
private MATCH_TYPE matchType; private MATCH_TYPE matchType;
private String family; private Text family;
private Pattern columnMatcher; private Pattern columnMatcher;
private Text col; private Text col;
@ -73,7 +73,7 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
try { try {
if(qualifier == null || qualifier.getLength() == 0) { if(qualifier == null || qualifier.getLength() == 0) {
this.matchType = MATCH_TYPE.FAMILY_ONLY; this.matchType = MATCH_TYPE.FAMILY_ONLY;
this.family = HStoreKey.extractFamily(col).toString(); this.family = HStoreKey.extractFamily(col).toText();
this.wildCardmatch = true; this.wildCardmatch = true;
} else if(isRegexPattern.matcher(qualifier.toString()).matches()) { } else if(isRegexPattern.matcher(qualifier.toString()).matches()) {
this.matchType = MATCH_TYPE.REGEX; this.matchType = MATCH_TYPE.REGEX;
@ -93,13 +93,10 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
boolean matches(Text c) throws IOException { boolean matches(Text c) throws IOException {
if(this.matchType == MATCH_TYPE.SIMPLE) { if(this.matchType == MATCH_TYPE.SIMPLE) {
return c.equals(this.col); return c.equals(this.col);
} else if(this.matchType == MATCH_TYPE.FAMILY_ONLY) { } else if(this.matchType == MATCH_TYPE.FAMILY_ONLY) {
return HStoreKey.extractFamily(c).toString().equals(this.family); return HStoreKey.extractFamily(c).equals(this.family);
} else if(this.matchType == MATCH_TYPE.REGEX) { } else if(this.matchType == MATCH_TYPE.REGEX) {
return this.columnMatcher.matcher(c.toString()).matches(); return this.columnMatcher.matcher(c.toString()).matches();
} else { } else {
throw new IOException("Invalid match type: " + this.matchType); throw new IOException("Invalid match type: " + this.matchType);
} }
@ -130,7 +127,7 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
this.multipleMatchers = false; this.multipleMatchers = false;
this.okCols = new TreeMap<Text, Vector<ColumnMatcher>>(); this.okCols = new TreeMap<Text, Vector<ColumnMatcher>>();
for(int i = 0; i < targetCols.length; i++) { for(int i = 0; i < targetCols.length; i++) {
Text family = HStoreKey.extractFamily(targetCols[i]); Text family = HStoreKey.extractFamily(targetCols[i]).toText();
Vector<ColumnMatcher> matchers = okCols.get(family); Vector<ColumnMatcher> matchers = okCols.get(family);
if(matchers == null) { if(matchers == null) {
matchers = new Vector<ColumnMatcher>(); matchers = new Vector<ColumnMatcher>();
@ -160,8 +157,8 @@ public abstract class HAbstractScanner implements HInternalScannerInterface {
*/ */
boolean columnMatch(int i) throws IOException { boolean columnMatch(int i) throws IOException {
Text column = keys[i].getColumn(); Text column = keys[i].getColumn();
Text family = HStoreKey.extractFamily(column); Vector<ColumnMatcher> matchers =
Vector<ColumnMatcher> matchers = okCols.get(family); okCols.get(HStoreKey.extractFamily(column));
if(matchers == null) { if(matchers == null) {
return false; return false;
} }

View File

@ -283,7 +283,7 @@ public class HRegion implements HConstants {
long maxSeqId = -1; long maxSeqId = -1;
for(Map.Entry<Text, HColumnDescriptor> e : for(Map.Entry<Text, HColumnDescriptor> e :
this.regionInfo.getTableDesc().families().entrySet()) { this.regionInfo.getTableDesc().families().entrySet()) {
Text colFamily = HStoreKey.extractFamily(e.getKey()); Text colFamily = HStoreKey.extractFamily(e.getKey()).toText();
HStore store = new HStore(rootDir, this.regionInfo.getRegionName(), HStore store = new HStore(rootDir, this.regionInfo.getRegionName(),
this.encodedRegionName, e.getValue(), fs, oldLogFile, conf); this.encodedRegionName, e.getValue(), fs, oldLogFile, conf);

View File

@ -44,6 +44,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.filter.RowFilterInterface; import org.apache.hadoop.hbase.filter.RowFilterInterface;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.TextSequence;
import org.apache.hadoop.io.MapFile; import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
@ -565,7 +566,7 @@ class HStore implements HConstants {
this.regionName = regionName; this.regionName = regionName;
this.encodedRegionName = encodedName; this.encodedRegionName = encodedName;
this.family = family; this.family = family;
this.familyName = HStoreKey.extractFamily(this.family.getName()); this.familyName = HStoreKey.extractFamily(this.family.getName()).toText();
this.compression = SequenceFile.CompressionType.NONE; this.compression = SequenceFile.CompressionType.NONE;
this.storeName = this.encodedRegionName + "/" + this.familyName.toString(); this.storeName = this.encodedRegionName + "/" + this.familyName.toString();
@ -939,8 +940,8 @@ class HStore implements HConstants {
try { try {
for (Map.Entry<HStoreKey, byte []> es: cache.entrySet()) { for (Map.Entry<HStoreKey, byte []> es: cache.entrySet()) {
HStoreKey curkey = es.getKey(); HStoreKey curkey = es.getKey();
if (this.familyName.equals(HStoreKey.extractFamily( TextSequence f = HStoreKey.extractFamily(curkey.getColumn());
curkey.getColumn()))) { if (f.equals(this.familyName)) {
out.append(curkey, new ImmutableBytesWritable(es.getValue())); out.append(curkey, new ImmutableBytesWritable(es.getValue()));
} }
} }

View File

@ -19,6 +19,7 @@
*/ */
package org.apache.hadoop.hbase; package org.apache.hadoop.hbase;
import org.apache.hadoop.hbase.io.TextSequence;
import org.apache.hadoop.io.*; import org.apache.hadoop.io.*;
import java.io.*; import java.io.*;
@ -27,94 +28,14 @@ import java.io.*;
* A Key for a stored row * A Key for a stored row
*/ */
public class HStoreKey implements WritableComparable { public class HStoreKey implements WritableComparable {
/**
* Colon character in UTF-8
*/
public static final char COLUMN_FAMILY_DELIMITER = ':'; public static final char COLUMN_FAMILY_DELIMITER = ':';
// TODO: Move these utility methods elsewhere (To a Column class?). private Text row;
/** private Text column;
* Extracts the column family name from a column private long timestamp;
* For example, returns 'info' if the specified column was 'info:server'
* @param col name of column
* @return column family name
* @throws InvalidColumnNameException
*/
public static Text extractFamily(final Text col)
throws InvalidColumnNameException {
return extractFamily(col, false);
}
/**
* Extracts the column family name from a column
* For example, returns 'info' if the specified column was 'info:server'
* @param col name of column
* @param withColon if returned family name should include the ':' suffix.
* @return column family name
* @throws InvalidColumnNameException
*/
public static Text extractFamily(final Text col, final boolean withColon)
throws InvalidColumnNameException {
int offset = getColonOffset(col);
// Include ':' in copy?
offset += (withColon)? 1: 0;
if (offset == col.getLength()) {
return col;
}
byte [] buffer = new byte[offset];
System.arraycopy(col.getBytes(), 0, buffer, 0, offset);
return new Text(buffer);
}
/**
* Extracts the column qualifier, the portion that follows the colon (':')
* family/qualifier separator.
* For example, returns 'server' if the specified column was 'info:server'
* @param col name of column
* @return column qualifier or null if there is no qualifier.
* @throws InvalidColumnNameException
*/
public static Text extractQualifier(final Text col)
throws InvalidColumnNameException {
int offset = getColonOffset(col);
if (offset + 1 == col.getLength()) {
return null;
}
int bufferLength = col.getLength() - (offset + 1);
byte [] buffer = new byte[bufferLength];
System.arraycopy(col.getBytes(), offset + 1, buffer, 0, bufferLength);
return new Text(buffer);
}
private static int getColonOffset(final Text col)
throws InvalidColumnNameException {
int offset = -1;
for (int i = 0; i < col.getLength(); i++) {
if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) {
offset = i;
break;
}
}
if(offset < 0) {
throw new InvalidColumnNameException(col + " is missing the colon " +
"family/qualifier separator");
}
return offset;
}
/**
* Returns row and column bytes out of an HStoreKey.
* @param hsk Store key.
* @return byte array encoding of HStoreKey
* @throws UnsupportedEncodingException
*/
public static byte[] getBytes(final HStoreKey hsk)
throws UnsupportedEncodingException {
StringBuilder s = new StringBuilder(hsk.getRow().toString());
s.append(hsk.getColumn().toString());
return s.toString().getBytes(HConstants.UTF8_ENCODING);
}
Text row;
Text column;
long timestamp;
/** Default constructor used in conjunction with Writable interface */ /** Default constructor used in conjunction with Writable interface */
@ -163,6 +84,7 @@ public class HStoreKey implements WritableComparable {
* @param timestamp timestamp value * @param timestamp timestamp value
*/ */
public HStoreKey(Text row, Text column, long timestamp) { public HStoreKey(Text row, Text column, long timestamp) {
// Make copies by doing 'new Text(arg)'.
this.row = new Text(row); this.row = new Text(row);
this.column = new Text(column); this.column = new Text(column);
this.timestamp = timestamp; this.timestamp = timestamp;
@ -339,4 +261,90 @@ public class HStoreKey implements WritableComparable {
column.readFields(in); column.readFields(in);
timestamp = in.readLong(); timestamp = in.readLong();
} }
// Statics
// TODO: Move these utility methods elsewhere (To a Column class?).
/**
* Extracts the column family name from a column
* For example, returns 'info' if the specified column was 'info:server'
* @param col name of column
* @return column famile as a TextSequence based on the passed
* <code>col</code>. If <code>col</code> is reused, make a new Text of
* the result by calling {@link TextSequence#toText()}.
* @throws InvalidColumnNameException
*/
public static TextSequence extractFamily(final Text col)
throws InvalidColumnNameException {
return extractFamily(col, false);
}
/**
* Extracts the column family name from a column
* For example, returns 'info' if the specified column was 'info:server'
* @param col name of column
* @return column famile as a TextSequence based on the passed
* <code>col</code>. If <code>col</code> is reused, make a new Text of
* the result by calling {@link TextSequence#toText()}.
* @throws InvalidColumnNameException
*/
public static TextSequence extractFamily(final Text col,
final boolean withColon)
throws InvalidColumnNameException {
int offset = getColonOffset(col);
// Include ':' in copy?
offset += (withColon)? 1: 0;
if (offset == col.getLength()) {
return new TextSequence(col);
}
return new TextSequence(col, 0, offset);
}
/**
* Extracts the column qualifier, the portion that follows the colon (':')
* family/qualifier separator.
* For example, returns 'server' if the specified column was 'info:server'
* @param col name of column
* @return column qualifier as a TextSequence based on the passed
* <code>col</code>. If <code>col</code> is reused, make a new Text of
* the result by calling {@link TextSequence#toText()}.
* @throws InvalidColumnNameException
*/
public static TextSequence extractQualifier(final Text col)
throws InvalidColumnNameException {
int offset = getColonOffset(col);
if (offset + 1 == col.getLength()) {
return null;
}
return new TextSequence(col, offset + 1);
}
private static int getColonOffset(final Text col)
throws InvalidColumnNameException {
int offset = -1;
for (int i = 0; i < col.getLength(); i++) {
if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) {
offset = i;
break;
}
}
if(offset < 0) {
throw new InvalidColumnNameException(col + " is missing the colon " +
"family/qualifier separator");
}
return offset;
}
/**
* Returns row and column bytes out of an HStoreKey.
* @param hsk Store key.
* @return byte array encoding of HStoreKey
* @throws UnsupportedEncodingException
*/
public static byte[] getBytes(final HStoreKey hsk)
throws UnsupportedEncodingException {
StringBuilder s = new StringBuilder(hsk.getRow().toString());
s.append(hsk.getColumn().toString());
return s.toString().getBytes(HConstants.UTF8_ENCODING);
}
} }

View File

@ -21,7 +21,6 @@ package org.apache.hadoop.hbase;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Arrays; import java.util.Arrays;
import java.util.Date; import java.util.Date;
@ -472,25 +471,28 @@ public class PerformanceEvaluation implements HConstants {
} }
} }
/*
* Format passed integer.
* This method takes some time and is done inline uploading data. For
* example, doing the mapfile test, generation of the key and value
* consumes about 30% of CPU time.
* @param i
* @return Integer as String zero padded.
*/
static Text format(final int i) { static Text format(final int i) {
return new Text(String.format("%010d", Integer.valueOf(i))); return new Text(String.format("%010d", Integer.valueOf(i)));
} }
/* /*
* This method takes some time and is done inline uploading data. For
* example, doing the mapfile test, generation of the key and value
* consumes about 30% of CPU time.
* @return Generated random value to insert into a table cell. * @return Generated random value to insert into a table cell.
*/ */
static byte[] generateValue(final Random r) { static byte[] generateValue(final Random r) {
StringBuilder val = new StringBuilder(); byte [] b = new byte [ROW_LENGTH];
while(val.length() < ROW_LENGTH) { r.nextBytes(b);
val.append(Long.toString(r.nextLong())); return b;
}
byte[] value = null;
try {
value = val.toString().getBytes(HConstants.UTF8_ENCODING);
} catch (UnsupportedEncodingException e) {
assert(false);
}
return value;
} }
static Text getRandomRow(final Random random, final int totalRows) { static Text getRandomRow(final Random random, final int totalRows) {
@ -556,7 +558,7 @@ public class PerformanceEvaluation implements HConstants {
Random random = new Random(); Random random = new Random();
Configuration c = new Configuration(); Configuration c = new Configuration();
FileSystem fs = FileSystem.get(c); FileSystem fs = FileSystem.get(c);
Path mf = new Path("performanceevaluation.mapfile"); Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
if (fs.exists(mf)) { if (fs.exists(mf)) {
fs.delete(mf); fs.delete(mf);
} }
@ -571,7 +573,9 @@ public class PerformanceEvaluation implements HConstants {
} }
writer.close(); writer.close();
LOG.info("Writing " + ROW_COUNT + " records took " + LOG.info("Writing " + ROW_COUNT + " records took " +
(System.currentTimeMillis() - startTime) + "ms"); (System.currentTimeMillis() - startTime) + "ms (Note: generation of keys " +
"and values is done inline and has been seen to consume " +
"significant time: e.g. ~30% of cpu time");
// Do random reads. // Do random reads.
LOG.info("Reading " + ROW_COUNT + " random rows"); LOG.info("Reading " + ROW_COUNT + " random rows");
MapFile.Reader reader = new MapFile.Reader(fs, mf.toString(), c); MapFile.Reader reader = new MapFile.Reader(fs, mf.toString(), c);
@ -585,7 +589,9 @@ public class PerformanceEvaluation implements HConstants {
} }
reader.close(); reader.close();
LOG.info("Reading " + ROW_COUNT + " random records took " + LOG.info("Reading " + ROW_COUNT + " random records took " +
(System.currentTimeMillis() - startTime) + "ms"); (System.currentTimeMillis() - startTime) + "ms (Note: generation of " +
"random key is done in line and takes a significant amount of cpu " +
"time: e.g 10-15%");
// Do random reads. // Do random reads.
LOG.info("Reading " + ROW_COUNT + " rows sequentially"); LOG.info("Reading " + ROW_COUNT + " rows sequentially");
reader = new MapFile.Reader(fs, mf.toString(), c); reader = new MapFile.Reader(fs, mf.toString(), c);