HADOOP-2377 Holding open MapFile.Readers is expensive, so use less of them

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@602199 13f79535-47bb-0310-9956-ffa450edef68
2007-12-07 19:49:19 +00:00 · 2007-12-07 19:49:19 +00:00 · bccf1dc26f
parent 11df017a67
commit bccf1dc26f
6 changed files with 20 additions and 37 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -94,6 +94,7 @@ Trunk (unreleased changes)
   HADOOP-2299 Support inclusive scans (Bryan Duxbury via Stack)
   HADOOP-2333 Client side retries happen at the wrong level
   HADOOP-2357 Compaction cleanup; less deleting + prevent possible file leaks
+   HADOOP-2377 Holding open MapFile.Readers is expensive, so use less of them


 Release 0.15.1
--- a/conf/hbase-default.xml
+++ b/conf/hbase-default.xml
@ -153,7 +153,7 @@
  </property>
  <property>
    <name>hbase.hregion.memcache.flush.size</name>
-    <value>16777216</value>
+    <value>67108864</value>
    <description>
    A HRegion memcache will be flushed to disk if size of the memcache
    exceeds this number of bytes.  Value is checked by a thread that runs
@ -174,11 +174,10 @@
  </property>
  <property>
    <name>hbase.hregion.max.filesize</name>
-    <value>67108864</value>
+    <value>268435456</value>
    <description>
    Maximum desired file size for an HRegion.  If filesize exceeds
-    value + (value / 2), the HRegion is split in two.  Default: 64M.
-    If too large, splits will take so long, clients timeout.
+    value + (value / 2), the HRegion is split in two.  Default: 256M.
    </description>
  </property>
  <property>
--- a/src/java/org/apache/hadoop/hbase/HConstants.java
+++ b/src/java/org/apache/hadoop/hbase/HConstants.java
@ -88,7 +88,7 @@ public interface HConstants {
  static final String HREGION_OLDLOGFILE_NAME = "oldlogfile.log";
  
  /** Default maximum file size */
-  static final long DEFAULT_MAX_FILE_SIZE = 64 * 1024 * 1024;        // 64MB
+  static final long DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024;

  // Always store the location of the root table's HRegion.
  // This HRegion is never split.
--- a/src/java/org/apache/hadoop/hbase/HRegion.java
+++ b/src/java/org/apache/hadoop/hbase/HRegion.java
@ -310,9 +310,9 @@ public class HRegion implements HConstants {
      fs.delete(merges);
    }

-    // By default, we flush the cache when 16M.
+    // By default, we flush the cache when 64M.
    this.memcacheFlushSize = conf.getInt("hbase.hregion.memcache.flush.size",
-        1024*1024*16);
+      1024*1024*64);
    this.flushListener = listener;
    this.blockingMemcacheSize = this.memcacheFlushSize *
      conf.getInt("hbase.hregion.memcache.block.multiplier", 2);
--- a/src/java/org/apache/hadoop/hbase/HStoreFile.java
+++ b/src/java/org/apache/hadoop/hbase/HStoreFile.java
@ -504,14 +504,19 @@ public class HStoreFile implements HConstants, WritableComparable {
   */
  static Reference readSplitInfo(final Path p, final FileSystem fs)
  throws IOException {
+    Reference r = null;
    FSDataInputStream in = fs.open(p);
-    String rn = in.readUTF();
-    HStoreKey midkey = new HStoreKey();
-    midkey.readFields(in);
-    long fid = in.readLong();
-    boolean tmp = in.readBoolean();
-    return new Reference(rn, fid, midkey, tmp? Range.top: Range.bottom);
-    
+    try {
+      String rn = in.readUTF();
+      HStoreKey midkey = new HStoreKey();
+      midkey.readFields(in);
+      long fid = in.readLong();
+      boolean tmp = in.readBoolean();
+      r =  new Reference(rn, fid, midkey, tmp? Range.top: Range.bottom);
+    } finally {
+      in.close();
+    }
+    return r; 
  }

  private void createOrFail(final FileSystem fs, final Path p)
--- a/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java
+++ b/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java
@ -64,15 +64,6 @@ import org.apache.log4j.Logger;
 * 
 * <p>If number of clients > 1, we start up a MapReduce job. Each map task
 * runs an individual client. Each client does about 1GB of data.
- * 
- * <p>If client == 1, the test table is created and deleted at end of each run
- * and the <code>sequentialWrite</code> test is run first if a test requires
- * a populated test table: e.g. if you are running the
- * <code>sequentialRead</code> test, the test table must hold data for it to
- * read.  If client > 1, and we are running clients in a map task, the table
- * is not deleted at the end-of-run.  Also, if running the
- * <code>sequentialRead</code> or </code>randomRead</code> tests, the
- * <code>sequentialWrite</code> test is not automatically run first.
 */
 public class PerformanceEvaluation implements HConstants {
  static final Logger LOG =
@ -553,22 +544,9 @@ public class PerformanceEvaluation implements HConstants {
    try {
      admin = new HBaseAdmin(this.conf);
      checkTable(admin);
-
-      if (cmd.equals(RANDOM_READ) || cmd.equals(RANDOM_READ_MEM) ||
-          cmd.equals(SCAN) || cmd.equals(SEQUENTIAL_READ)) {
-        status.setStatus("Running " + SEQUENTIAL_WRITE + " first so " +
-            cmd + " has data to work against");
-        runOneClient(SEQUENTIAL_WRITE, 0, this.R, this.R, status);
-      }
-      
      runOneClient(cmd, 0, this.R, this.R, status);
    } catch (Exception e) {
      LOG.error("Failed", e);
-    } finally {
-      LOG.info("Deleting table " + tableDescriptor.getName());
-      if (admin != null) {
-        admin.deleteTable(tableDescriptor.getName());
-      }
    } 
  }