From bccf1dc26f0af20b0ec70d420990e7fd9e01c4d7 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Fri, 7 Dec 2007 19:49:19 +0000 Subject: [PATCH] HADOOP-2377 Holding open MapFile.Readers is expensive, so use less of them git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@602199 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + conf/hbase-default.xml | 7 +++--- .../org/apache/hadoop/hbase/HConstants.java | 2 +- src/java/org/apache/hadoop/hbase/HRegion.java | 4 ++-- .../org/apache/hadoop/hbase/HStoreFile.java | 19 +++++++++------ .../hadoop/hbase/PerformanceEvaluation.java | 24 +------------------ 6 files changed, 20 insertions(+), 37 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 67f4e91bc75..bcde2c89d76 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -94,6 +94,7 @@ Trunk (unreleased changes) HADOOP-2299 Support inclusive scans (Bryan Duxbury via Stack) HADOOP-2333 Client side retries happen at the wrong level HADOOP-2357 Compaction cleanup; less deleting + prevent possible file leaks + HADOOP-2377 Holding open MapFile.Readers is expensive, so use less of them Release 0.15.1 diff --git a/conf/hbase-default.xml b/conf/hbase-default.xml index d5592367b40..b8ee3581180 100644 --- a/conf/hbase-default.xml +++ b/conf/hbase-default.xml @@ -153,7 +153,7 @@ hbase.hregion.memcache.flush.size - 16777216 + 67108864 A HRegion memcache will be flushed to disk if size of the memcache exceeds this number of bytes. Value is checked by a thread that runs @@ -174,11 +174,10 @@ hbase.hregion.max.filesize - 67108864 + 268435456 Maximum desired file size for an HRegion. If filesize exceeds - value + (value / 2), the HRegion is split in two. Default: 64M. - If too large, splits will take so long, clients timeout. + value + (value / 2), the HRegion is split in two. Default: 256M. diff --git a/src/java/org/apache/hadoop/hbase/HConstants.java b/src/java/org/apache/hadoop/hbase/HConstants.java index ce1399ca0d2..f409252a875 100644 --- a/src/java/org/apache/hadoop/hbase/HConstants.java +++ b/src/java/org/apache/hadoop/hbase/HConstants.java @@ -88,7 +88,7 @@ public interface HConstants { static final String HREGION_OLDLOGFILE_NAME = "oldlogfile.log"; /** Default maximum file size */ - static final long DEFAULT_MAX_FILE_SIZE = 64 * 1024 * 1024; // 64MB + static final long DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024; // Always store the location of the root table's HRegion. // This HRegion is never split. diff --git a/src/java/org/apache/hadoop/hbase/HRegion.java b/src/java/org/apache/hadoop/hbase/HRegion.java index 62c1f9babed..a483b570ec6 100644 --- a/src/java/org/apache/hadoop/hbase/HRegion.java +++ b/src/java/org/apache/hadoop/hbase/HRegion.java @@ -310,9 +310,9 @@ public class HRegion implements HConstants { fs.delete(merges); } - // By default, we flush the cache when 16M. + // By default, we flush the cache when 64M. this.memcacheFlushSize = conf.getInt("hbase.hregion.memcache.flush.size", - 1024*1024*16); + 1024*1024*64); this.flushListener = listener; this.blockingMemcacheSize = this.memcacheFlushSize * conf.getInt("hbase.hregion.memcache.block.multiplier", 2); diff --git a/src/java/org/apache/hadoop/hbase/HStoreFile.java b/src/java/org/apache/hadoop/hbase/HStoreFile.java index 323a2bdb972..137a611ce1b 100644 --- a/src/java/org/apache/hadoop/hbase/HStoreFile.java +++ b/src/java/org/apache/hadoop/hbase/HStoreFile.java @@ -504,14 +504,19 @@ public class HStoreFile implements HConstants, WritableComparable { */ static Reference readSplitInfo(final Path p, final FileSystem fs) throws IOException { + Reference r = null; FSDataInputStream in = fs.open(p); - String rn = in.readUTF(); - HStoreKey midkey = new HStoreKey(); - midkey.readFields(in); - long fid = in.readLong(); - boolean tmp = in.readBoolean(); - return new Reference(rn, fid, midkey, tmp? Range.top: Range.bottom); - + try { + String rn = in.readUTF(); + HStoreKey midkey = new HStoreKey(); + midkey.readFields(in); + long fid = in.readLong(); + boolean tmp = in.readBoolean(); + r = new Reference(rn, fid, midkey, tmp? Range.top: Range.bottom); + } finally { + in.close(); + } + return r; } private void createOrFail(final FileSystem fs, final Path p) diff --git a/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java b/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java index 71fce068f9b..90c4afa8731 100644 --- a/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java +++ b/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java @@ -64,15 +64,6 @@ import org.apache.log4j.Logger; * *

If number of clients > 1, we start up a MapReduce job. Each map task * runs an individual client. Each client does about 1GB of data. - * - *

If client == 1, the test table is created and deleted at end of each run - * and the sequentialWrite test is run first if a test requires - * a populated test table: e.g. if you are running the - * sequentialRead test, the test table must hold data for it to - * read. If client > 1, and we are running clients in a map task, the table - * is not deleted at the end-of-run. Also, if running the - * sequentialRead or randomRead tests, the - * sequentialWrite test is not automatically run first. */ public class PerformanceEvaluation implements HConstants { static final Logger LOG = @@ -553,23 +544,10 @@ public class PerformanceEvaluation implements HConstants { try { admin = new HBaseAdmin(this.conf); checkTable(admin); - - if (cmd.equals(RANDOM_READ) || cmd.equals(RANDOM_READ_MEM) || - cmd.equals(SCAN) || cmd.equals(SEQUENTIAL_READ)) { - status.setStatus("Running " + SEQUENTIAL_WRITE + " first so " + - cmd + " has data to work against"); - runOneClient(SEQUENTIAL_WRITE, 0, this.R, this.R, status); - } - runOneClient(cmd, 0, this.R, this.R, status); } catch (Exception e) { LOG.error("Failed", e); - } finally { - LOG.info("Deleting table " + tableDescriptor.getName()); - if (admin != null) { - admin.deleteTable(tableDescriptor.getName()); - } - } + } } private void runTest(final String cmd) throws IOException {