From 87273adb1c1c4be497e7867a6d70540c4c0f7044 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Fri, 21 Dec 2007 21:58:25 +0000 Subject: [PATCH] HADOOP-2479 Save on number of Text object creations git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@606332 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../apache/hadoop/hbase/HAbstractScanner.java | 15 +- src/java/org/apache/hadoop/hbase/HRegion.java | 2 +- src/java/org/apache/hadoop/hbase/HStore.java | 7 +- .../org/apache/hadoop/hbase/HStoreKey.java | 180 +++++++++--------- .../hadoop/hbase/PerformanceEvaluation.java | 38 ++-- 6 files changed, 128 insertions(+), 115 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 3235ab9e5b1..3a31fb41bf6 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -19,6 +19,7 @@ Trunk (unreleased changes) (Bryan Duxbury via Stack) OPTIMIZATIONS + HADOOP-2479 Save on number of Text object creations BUG FIXES HADOOP-2059 In tests, exceptions in min dfs shutdown should not fail test diff --git a/src/java/org/apache/hadoop/hbase/HAbstractScanner.java b/src/java/org/apache/hadoop/hbase/HAbstractScanner.java index 952f4b3b346..731a406067e 100644 --- a/src/java/org/apache/hadoop/hbase/HAbstractScanner.java +++ b/src/java/org/apache/hadoop/hbase/HAbstractScanner.java @@ -64,7 +64,7 @@ public abstract class HAbstractScanner implements HInternalScannerInterface { private static class ColumnMatcher { private boolean wildCardmatch; private MATCH_TYPE matchType; - private String family; + private Text family; private Pattern columnMatcher; private Text col; @@ -73,7 +73,7 @@ public abstract class HAbstractScanner implements HInternalScannerInterface { try { if(qualifier == null || qualifier.getLength() == 0) { this.matchType = MATCH_TYPE.FAMILY_ONLY; - this.family = HStoreKey.extractFamily(col).toString(); + this.family = HStoreKey.extractFamily(col).toText(); this.wildCardmatch = true; } else if(isRegexPattern.matcher(qualifier.toString()).matches()) { this.matchType = MATCH_TYPE.REGEX; @@ -93,13 +93,10 @@ public abstract class HAbstractScanner implements HInternalScannerInterface { boolean matches(Text c) throws IOException { if(this.matchType == MATCH_TYPE.SIMPLE) { return c.equals(this.col); - } else if(this.matchType == MATCH_TYPE.FAMILY_ONLY) { - return HStoreKey.extractFamily(c).toString().equals(this.family); - + return HStoreKey.extractFamily(c).equals(this.family); } else if(this.matchType == MATCH_TYPE.REGEX) { return this.columnMatcher.matcher(c.toString()).matches(); - } else { throw new IOException("Invalid match type: " + this.matchType); } @@ -130,7 +127,7 @@ public abstract class HAbstractScanner implements HInternalScannerInterface { this.multipleMatchers = false; this.okCols = new TreeMap>(); for(int i = 0; i < targetCols.length; i++) { - Text family = HStoreKey.extractFamily(targetCols[i]); + Text family = HStoreKey.extractFamily(targetCols[i]).toText(); Vector matchers = okCols.get(family); if(matchers == null) { matchers = new Vector(); @@ -160,8 +157,8 @@ public abstract class HAbstractScanner implements HInternalScannerInterface { */ boolean columnMatch(int i) throws IOException { Text column = keys[i].getColumn(); - Text family = HStoreKey.extractFamily(column); - Vector matchers = okCols.get(family); + Vector matchers = + okCols.get(HStoreKey.extractFamily(column)); if(matchers == null) { return false; } diff --git a/src/java/org/apache/hadoop/hbase/HRegion.java b/src/java/org/apache/hadoop/hbase/HRegion.java index f743ebad831..95d49187f0c 100644 --- a/src/java/org/apache/hadoop/hbase/HRegion.java +++ b/src/java/org/apache/hadoop/hbase/HRegion.java @@ -283,7 +283,7 @@ public class HRegion implements HConstants { long maxSeqId = -1; for(Map.Entry e : this.regionInfo.getTableDesc().families().entrySet()) { - Text colFamily = HStoreKey.extractFamily(e.getKey()); + Text colFamily = HStoreKey.extractFamily(e.getKey()).toText(); HStore store = new HStore(rootDir, this.regionInfo.getRegionName(), this.encodedRegionName, e.getValue(), fs, oldLogFile, conf); diff --git a/src/java/org/apache/hadoop/hbase/HStore.java b/src/java/org/apache/hadoop/hbase/HStore.java index ff191fca18d..8ea55696554 100644 --- a/src/java/org/apache/hadoop/hbase/HStore.java +++ b/src/java/org/apache/hadoop/hbase/HStore.java @@ -44,6 +44,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.filter.RowFilterInterface; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.io.TextSequence; import org.apache.hadoop.io.MapFile; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; @@ -565,7 +566,7 @@ class HStore implements HConstants { this.regionName = regionName; this.encodedRegionName = encodedName; this.family = family; - this.familyName = HStoreKey.extractFamily(this.family.getName()); + this.familyName = HStoreKey.extractFamily(this.family.getName()).toText(); this.compression = SequenceFile.CompressionType.NONE; this.storeName = this.encodedRegionName + "/" + this.familyName.toString(); @@ -939,8 +940,8 @@ class HStore implements HConstants { try { for (Map.Entry es: cache.entrySet()) { HStoreKey curkey = es.getKey(); - if (this.familyName.equals(HStoreKey.extractFamily( - curkey.getColumn()))) { + TextSequence f = HStoreKey.extractFamily(curkey.getColumn()); + if (f.equals(this.familyName)) { out.append(curkey, new ImmutableBytesWritable(es.getValue())); } } diff --git a/src/java/org/apache/hadoop/hbase/HStoreKey.java b/src/java/org/apache/hadoop/hbase/HStoreKey.java index 0507d8bcf06..edde010e7b0 100644 --- a/src/java/org/apache/hadoop/hbase/HStoreKey.java +++ b/src/java/org/apache/hadoop/hbase/HStoreKey.java @@ -19,6 +19,7 @@ */ package org.apache.hadoop.hbase; +import org.apache.hadoop.hbase.io.TextSequence; import org.apache.hadoop.io.*; import java.io.*; @@ -27,94 +28,14 @@ import java.io.*; * A Key for a stored row */ public class HStoreKey implements WritableComparable { + /** + * Colon character in UTF-8 + */ public static final char COLUMN_FAMILY_DELIMITER = ':'; - // TODO: Move these utility methods elsewhere (To a Column class?). - /** - * Extracts the column family name from a column - * For example, returns 'info' if the specified column was 'info:server' - * @param col name of column - * @return column family name - * @throws InvalidColumnNameException - */ - public static Text extractFamily(final Text col) - throws InvalidColumnNameException { - return extractFamily(col, false); - } - - /** - * Extracts the column family name from a column - * For example, returns 'info' if the specified column was 'info:server' - * @param col name of column - * @param withColon if returned family name should include the ':' suffix. - * @return column family name - * @throws InvalidColumnNameException - */ - public static Text extractFamily(final Text col, final boolean withColon) - throws InvalidColumnNameException { - int offset = getColonOffset(col); - // Include ':' in copy? - offset += (withColon)? 1: 0; - if (offset == col.getLength()) { - return col; - } - byte [] buffer = new byte[offset]; - System.arraycopy(col.getBytes(), 0, buffer, 0, offset); - return new Text(buffer); - } - - /** - * Extracts the column qualifier, the portion that follows the colon (':') - * family/qualifier separator. - * For example, returns 'server' if the specified column was 'info:server' - * @param col name of column - * @return column qualifier or null if there is no qualifier. - * @throws InvalidColumnNameException - */ - public static Text extractQualifier(final Text col) - throws InvalidColumnNameException { - int offset = getColonOffset(col); - if (offset + 1 == col.getLength()) { - return null; - } - int bufferLength = col.getLength() - (offset + 1); - byte [] buffer = new byte[bufferLength]; - System.arraycopy(col.getBytes(), offset + 1, buffer, 0, bufferLength); - return new Text(buffer); - } - - private static int getColonOffset(final Text col) - throws InvalidColumnNameException { - int offset = -1; - for (int i = 0; i < col.getLength(); i++) { - if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) { - offset = i; - break; - } - } - if(offset < 0) { - throw new InvalidColumnNameException(col + " is missing the colon " + - "family/qualifier separator"); - } - return offset; - } - - /** - * Returns row and column bytes out of an HStoreKey. - * @param hsk Store key. - * @return byte array encoding of HStoreKey - * @throws UnsupportedEncodingException - */ - public static byte[] getBytes(final HStoreKey hsk) - throws UnsupportedEncodingException { - StringBuilder s = new StringBuilder(hsk.getRow().toString()); - s.append(hsk.getColumn().toString()); - return s.toString().getBytes(HConstants.UTF8_ENCODING); - } - - Text row; - Text column; - long timestamp; + private Text row; + private Text column; + private long timestamp; /** Default constructor used in conjunction with Writable interface */ @@ -163,6 +84,7 @@ public class HStoreKey implements WritableComparable { * @param timestamp timestamp value */ public HStoreKey(Text row, Text column, long timestamp) { + // Make copies by doing 'new Text(arg)'. this.row = new Text(row); this.column = new Text(column); this.timestamp = timestamp; @@ -339,4 +261,90 @@ public class HStoreKey implements WritableComparable { column.readFields(in); timestamp = in.readLong(); } + + // Statics + // TODO: Move these utility methods elsewhere (To a Column class?). + + /** + * Extracts the column family name from a column + * For example, returns 'info' if the specified column was 'info:server' + * @param col name of column + * @return column famile as a TextSequence based on the passed + * col. If col is reused, make a new Text of + * the result by calling {@link TextSequence#toText()}. + * @throws InvalidColumnNameException + */ + public static TextSequence extractFamily(final Text col) + throws InvalidColumnNameException { + return extractFamily(col, false); + } + + /** + * Extracts the column family name from a column + * For example, returns 'info' if the specified column was 'info:server' + * @param col name of column + * @return column famile as a TextSequence based on the passed + * col. If col is reused, make a new Text of + * the result by calling {@link TextSequence#toText()}. + * @throws InvalidColumnNameException + */ + public static TextSequence extractFamily(final Text col, + final boolean withColon) + throws InvalidColumnNameException { + int offset = getColonOffset(col); + // Include ':' in copy? + offset += (withColon)? 1: 0; + if (offset == col.getLength()) { + return new TextSequence(col); + } + return new TextSequence(col, 0, offset); + } + + /** + * Extracts the column qualifier, the portion that follows the colon (':') + * family/qualifier separator. + * For example, returns 'server' if the specified column was 'info:server' + * @param col name of column + * @return column qualifier as a TextSequence based on the passed + * col. If col is reused, make a new Text of + * the result by calling {@link TextSequence#toText()}. + * @throws InvalidColumnNameException + */ + public static TextSequence extractQualifier(final Text col) + throws InvalidColumnNameException { + int offset = getColonOffset(col); + if (offset + 1 == col.getLength()) { + return null; + } + return new TextSequence(col, offset + 1); + } + + private static int getColonOffset(final Text col) + throws InvalidColumnNameException { + int offset = -1; + for (int i = 0; i < col.getLength(); i++) { + if (col.charAt(i) == COLUMN_FAMILY_DELIMITER) { + offset = i; + break; + } + } + if(offset < 0) { + throw new InvalidColumnNameException(col + " is missing the colon " + + "family/qualifier separator"); + } + return offset; + } + + /** + * Returns row and column bytes out of an HStoreKey. + * @param hsk Store key. + * @return byte array encoding of HStoreKey + * @throws UnsupportedEncodingException + */ + public static byte[] getBytes(final HStoreKey hsk) + throws UnsupportedEncodingException { + StringBuilder s = new StringBuilder(hsk.getRow().toString()); + s.append(hsk.getColumn().toString()); + return s.toString().getBytes(HConstants.UTF8_ENCODING); + } } \ No newline at end of file diff --git a/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java b/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java index 85de343f500..28676db7add 100644 --- a/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java +++ b/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java @@ -21,7 +21,6 @@ package org.apache.hadoop.hbase; import java.io.IOException; import java.io.PrintStream; -import java.io.UnsupportedEncodingException; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; @@ -472,25 +471,28 @@ public class PerformanceEvaluation implements HConstants { } } + /* + * Format passed integer. + * This method takes some time and is done inline uploading data. For + * example, doing the mapfile test, generation of the key and value + * consumes about 30% of CPU time. + * @param i + * @return Integer as String zero padded. + */ static Text format(final int i) { return new Text(String.format("%010d", Integer.valueOf(i))); } /* + * This method takes some time and is done inline uploading data. For + * example, doing the mapfile test, generation of the key and value + * consumes about 30% of CPU time. * @return Generated random value to insert into a table cell. */ static byte[] generateValue(final Random r) { - StringBuilder val = new StringBuilder(); - while(val.length() < ROW_LENGTH) { - val.append(Long.toString(r.nextLong())); - } - byte[] value = null; - try { - value = val.toString().getBytes(HConstants.UTF8_ENCODING); - } catch (UnsupportedEncodingException e) { - assert(false); - } - return value; + byte [] b = new byte [ROW_LENGTH]; + r.nextBytes(b); + return b; } static Text getRandomRow(final Random random, final int totalRows) { @@ -556,7 +558,7 @@ public class PerformanceEvaluation implements HConstants { Random random = new Random(); Configuration c = new Configuration(); FileSystem fs = FileSystem.get(c); - Path mf = new Path("performanceevaluation.mapfile"); + Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile")); if (fs.exists(mf)) { fs.delete(mf); } @@ -571,7 +573,9 @@ public class PerformanceEvaluation implements HConstants { } writer.close(); LOG.info("Writing " + ROW_COUNT + " records took " + - (System.currentTimeMillis() - startTime) + "ms"); + (System.currentTimeMillis() - startTime) + "ms (Note: generation of keys " + + "and values is done inline and has been seen to consume " + + "significant time: e.g. ~30% of cpu time"); // Do random reads. LOG.info("Reading " + ROW_COUNT + " random rows"); MapFile.Reader reader = new MapFile.Reader(fs, mf.toString(), c); @@ -585,7 +589,9 @@ public class PerformanceEvaluation implements HConstants { } reader.close(); LOG.info("Reading " + ROW_COUNT + " random records took " + - (System.currentTimeMillis() - startTime) + "ms"); + (System.currentTimeMillis() - startTime) + "ms (Note: generation of " + + "random key is done in line and takes a significant amount of cpu " + + "time: e.g 10-15%"); // Do random reads. LOG.info("Reading " + ROW_COUNT + " rows sequentially"); reader = new MapFile.Reader(fs, mf.toString(), c); @@ -599,7 +605,7 @@ public class PerformanceEvaluation implements HConstants { LOG.info("Reading " + ROW_COUNT + " records serially took " + (System.currentTimeMillis() - startTime) + "ms"); } - + private void runTest(final String cmd) throws IOException { if (cmd.equals(RANDOM_READ_MEM)) { // For this one test, so all fits in memory, make R smaller (See