From 11df017a67404c8ea53c41861b298a91b53933b0 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Fri, 7 Dec 2007 02:34:08 +0000 Subject: [PATCH] HADOOP-2362 Leaking hdfs file handle on region split git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@601961 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../apache/hadoop/hbase/HAbstractScanner.java | 5 - src/java/org/apache/hadoop/hbase/HMaster.java | 6 +- src/java/org/apache/hadoop/hbase/HMsg.java | 7 +- src/java/org/apache/hadoop/hbase/HRegion.java | 7 +- .../apache/hadoop/hbase/HRegionServer.java | 3 - .../apache/hadoop/hbase/HServerAddress.java | 4 +- .../org/apache/hadoop/hbase/HServerInfo.java | 9 +- src/java/org/apache/hadoop/hbase/HStore.java | 15 +- .../org/apache/hadoop/hbase/HStoreFile.java | 15 +- .../org/apache/hadoop/hbase/HStoreKey.java | 3 +- .../org/apache/hadoop/hbase/TestScanner2.java | 4 +- .../org/apache/hadoop/hbase/TestSplit.java | 139 +++++++++++------- 13 files changed, 119 insertions(+), 99 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 1101e32a006..67f4e91bc75 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -59,6 +59,7 @@ Trunk (unreleased changes) HADOOP-2347 REST servlet not thread safe but run in a threaded manner (Bryan Duxbury via Stack) HADOOP-2365 Result of HashFunction.hash() contains all identical values + HADOOP-2362 Leaking hdfs file handle on region split IMPROVEMENTS HADOOP-2401 Add convenience put method that takes writable diff --git a/src/java/org/apache/hadoop/hbase/HAbstractScanner.java b/src/java/org/apache/hadoop/hbase/HAbstractScanner.java index f63136ff8fc..952f4b3b346 100644 --- a/src/java/org/apache/hadoop/hbase/HAbstractScanner.java +++ b/src/java/org/apache/hadoop/hbase/HAbstractScanner.java @@ -29,8 +29,6 @@ import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.io.DataInputBuffer; -import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.Text; /** @@ -124,9 +122,6 @@ public abstract class HAbstractScanner implements HInternalScannerInterface { protected long timestamp; // The timestamp to match entries against private boolean wildcardMatch; private boolean multipleMatchers; - - protected DataOutputBuffer outbuf = new DataOutputBuffer(); - protected DataInputBuffer inbuf = new DataInputBuffer(); /** Constructor for abstract base class */ HAbstractScanner(long timestamp, Text[] targetCols) throws IOException { diff --git a/src/java/org/apache/hadoop/hbase/HMaster.java b/src/java/org/apache/hadoop/hbase/HMaster.java index b13668945af..3c2a9053e81 100644 --- a/src/java/org/apache/hadoop/hbase/HMaster.java +++ b/src/java/org/apache/hadoop/hbase/HMaster.java @@ -908,9 +908,9 @@ public class HMaster extends Thread implements HConstants, HMasterInterface, LOG.info("bootstrap: creating ROOT and first META regions"); try { HRegion root = HRegion.createHRegion(HRegionInfo.rootRegionInfo, - this.dir, this.conf, null); + this.dir, this.conf); HRegion meta = HRegion.createHRegion(HRegionInfo.firstMetaRegionInfo, - this.dir, this.conf, null); + this.dir, this.conf); // Add first region from the META table to the ROOT region. HRegion.addRegionToMETA(root, meta); @@ -2545,7 +2545,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface, // 2. Create the HRegion HRegion region = - HRegion.createHRegion(newRegion, this.dir, this.conf, null); + HRegion.createHRegion(newRegion, this.dir, this.conf); // 3. Insert into meta diff --git a/src/java/org/apache/hadoop/hbase/HMsg.java b/src/java/org/apache/hadoop/hbase/HMsg.java index 488ff8f5ef9..49ab2ee934d 100644 --- a/src/java/org/apache/hadoop/hbase/HMsg.java +++ b/src/java/org/apache/hadoop/hbase/HMsg.java @@ -19,9 +19,12 @@ */ package org.apache.hadoop.hbase; -import org.apache.hadoop.io.*; -import java.io.*; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.Writable; /******************************************************************************* * HMsg is for communicating instructions between the HMaster and the diff --git a/src/java/org/apache/hadoop/hbase/HRegion.java b/src/java/org/apache/hadoop/hbase/HRegion.java index be3a2da12a8..62c1f9babed 100644 --- a/src/java/org/apache/hadoop/hbase/HRegion.java +++ b/src/java/org/apache/hadoop/hbase/HRegion.java @@ -608,8 +608,10 @@ public class HRegion implements HConstants { // under each region. HRegion regionA = new HRegion(rootDir, log, fs, conf, regionAInfo, dirA, null); + regionA.close(); HRegion regionB = new HRegion(rootDir, log, fs, conf, regionBInfo, dirB, null); + regionB.close(); // Cleanup boolean deleted = fs.delete(splits); // Get rid of splits directory @@ -1581,13 +1583,12 @@ public class HRegion implements HConstants { * @param info Info for region to create. * @param rootDir Root directory for HBase instance * @param conf - * @param initialFiles InitialFiles to pass new HRegion. Pass null if none. * @return new HRegion * * @throws IOException */ static HRegion createHRegion(final HRegionInfo info, final Path rootDir, - final HBaseConfiguration conf, final Path initialFiles) + final HBaseConfiguration conf) throws IOException { Path regionDir = HRegion.getRegionDir(rootDir, HRegionInfo.encodeRegionName(info.getRegionName())); @@ -1595,7 +1596,7 @@ public class HRegion implements HConstants { fs.mkdirs(regionDir); return new HRegion(rootDir, new HLog(fs, new Path(regionDir, HREGION_LOGDIR_NAME), conf, null), - fs, conf, info, initialFiles, null); + fs, conf, info, null, null); } /** diff --git a/src/java/org/apache/hadoop/hbase/HRegionServer.java b/src/java/org/apache/hadoop/hbase/HRegionServer.java index 53e0c092c91..c61c678b196 100644 --- a/src/java/org/apache/hadoop/hbase/HRegionServer.java +++ b/src/java/org/apache/hadoop/hbase/HRegionServer.java @@ -445,9 +445,6 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { synchronized(cacheFlusherLock) { // Don't interrupt while we're working if (e != null) { try { - if (LOG.isDebugEnabled()) { - LOG.debug("flushing region " + e.getRegion().getRegionName()); - } if (e.getRegion().flushcache()) { compactor.compactionRequested(e); } diff --git a/src/java/org/apache/hadoop/hbase/HServerAddress.java b/src/java/org/apache/hadoop/hbase/HServerAddress.java index bb6226042e4..8b65b76e36d 100644 --- a/src/java/org/apache/hadoop/hbase/HServerAddress.java +++ b/src/java/org/apache/hadoop/hbase/HServerAddress.java @@ -21,7 +21,9 @@ package org.apache.hadoop.hbase; import org.apache.hadoop.io.*; -import java.io.*; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; import java.net.InetSocketAddress; /** diff --git a/src/java/org/apache/hadoop/hbase/HServerInfo.java b/src/java/org/apache/hadoop/hbase/HServerInfo.java index 74e06524fa9..757decbd8f1 100644 --- a/src/java/org/apache/hadoop/hbase/HServerInfo.java +++ b/src/java/org/apache/hadoop/hbase/HServerInfo.java @@ -19,9 +19,12 @@ */ package org.apache.hadoop.hbase; -import org.apache.hadoop.io.*; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.Writable; -import java.io.*; /** * HServerInfo contains metainfo about an HRegionServer, Currently it only @@ -139,7 +142,6 @@ public class HServerInfo implements Writable { // Writable - /** {@inheritDoc} */ public void readFields(DataInput in) throws IOException { this.serverAddress.readFields(in); this.startCode = in.readLong(); @@ -147,7 +149,6 @@ public class HServerInfo implements Writable { this.infoPort = in.readInt(); } - /** {@inheritDoc} */ public void write(DataOutput out) throws IOException { this.serverAddress.write(out); out.writeLong(this.startCode); diff --git a/src/java/org/apache/hadoop/hbase/HStore.java b/src/java/org/apache/hadoop/hbase/HStore.java index af3981d3345..86279e0d6d4 100644 --- a/src/java/org/apache/hadoop/hbase/HStore.java +++ b/src/java/org/apache/hadoop/hbase/HStore.java @@ -20,7 +20,6 @@ package org.apache.hadoop.hbase; import java.io.DataInputStream; -import java.io.DataOutputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; @@ -760,9 +759,11 @@ class HStore implements HConstants { bloomFilter = new RetouchedBloomFilter(); } FSDataInputStream in = fs.open(filterFile); - bloomFilter.readFields(in); - fs.close(); - + try { + bloomFilter.readFields(in); + } finally { + fs.close(); + } } else { if (LOG.isDebugEnabled()) { LOG.debug("creating bloom filter for " + this.storeName); @@ -913,7 +914,6 @@ class HStore implements HConstants { HStoreKey curkey = es.getKey(); if (this.familyName.equals(HStoreKey.extractFamily( curkey.getColumn()))) { - out.append(curkey, new ImmutableBytesWritable(es.getValue())); } } @@ -1040,7 +1040,7 @@ class HStore implements HConstants { // Write out a list of data files that we're replacing Path filesToReplace = new Path(curCompactStore, COMPACTION_TO_REPLACE); - DataOutputStream out = new DataOutputStream(fs.create(filesToReplace)); + FSDataOutputStream out = fs.create(filesToReplace); try { out.writeInt(filesToCompact.size()); for (HStoreFile hsf : filesToCompact) { @@ -1052,7 +1052,7 @@ class HStore implements HConstants { // Indicate that we're done. Path doneFile = new Path(curCompactStore, COMPACTION_DONE); - (new DataOutputStream(fs.create(doneFile))).close(); + fs.create(doneFile).close(); // Move the compaction into place. completeCompaction(curCompactStore); @@ -2151,5 +2151,4 @@ class HStore implements HConstants { "next(HStoreKey, StortedMap(...) is more efficient"); } } - } diff --git a/src/java/org/apache/hadoop/hbase/HStoreFile.java b/src/java/org/apache/hadoop/hbase/HStoreFile.java index 175312be228..323a2bdb972 100644 --- a/src/java/org/apache/hadoop/hbase/HStoreFile.java +++ b/src/java/org/apache/hadoop/hbase/HStoreFile.java @@ -22,7 +22,6 @@ package org.apache.hadoop.hbase; import java.io.DataInput; import java.io.DataInputStream; import java.io.DataOutput; -import java.io.DataOutputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.UnsupportedEncodingException; @@ -351,17 +350,15 @@ public class HStoreFile implements HConstants, WritableComparable { static HStoreFile obtainNewHStoreFile(HBaseConfiguration conf, Path dir, String encodedRegionName, Text colFamily, FileSystem fs) throws IOException { - Path mapdir = HStoreFile.getMapDir(dir, encodedRegionName, colFamily); - long fileId = Math.abs(rand.nextLong()); - - Path testpath1 = new Path(mapdir, createHStoreFilename(fileId)); - Path testpath2 = new Path(mapdir, createHStoreInfoFilename(fileId)); - while(fs.exists(testpath1) || fs.exists(testpath2)) { + Path testpath1 = null; + Path testpath2 = null; + long fileId = -1; + do { fileId = Math.abs(rand.nextLong()); testpath1 = new Path(mapdir, createHStoreFilename(fileId)); testpath2 = new Path(mapdir, createHStoreInfoFilename(fileId)); - } + } while(fs.exists(testpath1) || fs.exists(testpath2)); return new HStoreFile(conf, dir, encodedRegionName, colFamily, fileId); } @@ -606,7 +603,7 @@ public class HStoreFile implements HConstants, WritableComparable { */ void writeInfo(FileSystem fs, long infonum) throws IOException { Path p = getInfoFilePath(); - DataOutputStream out = new DataOutputStream(fs.create(p)); + FSDataOutputStream out = fs.create(p); try { out.writeByte(INFO_SEQ_NUM); out.writeLong(infonum); diff --git a/src/java/org/apache/hadoop/hbase/HStoreKey.java b/src/java/org/apache/hadoop/hbase/HStoreKey.java index 452894ae1e8..91f3b4ea3fe 100644 --- a/src/java/org/apache/hadoop/hbase/HStoreKey.java +++ b/src/java/org/apache/hadoop/hbase/HStoreKey.java @@ -330,5 +330,4 @@ public class HStoreKey implements WritableComparable { column.readFields(in); timestamp = in.readLong(); } -} - +} \ No newline at end of file diff --git a/src/test/org/apache/hadoop/hbase/TestScanner2.java b/src/test/org/apache/hadoop/hbase/TestScanner2.java index 25786f57b64..c3ff3f5e44b 100644 --- a/src/test/org/apache/hadoop/hbase/TestScanner2.java +++ b/src/test/org/apache/hadoop/hbase/TestScanner2.java @@ -305,10 +305,10 @@ public class TestScanner2 extends HBaseClusterTestCase { List newRegions = new ArrayList(2); newRegions.add(HRegion.createHRegion( new HRegionInfo(desc, null, new Text("midway")), - homedir, this.conf, null)); + homedir, this.conf)); newRegions.add(HRegion.createHRegion( new HRegionInfo(desc, new Text("midway"), null), - homedir, this.conf, null)); + homedir, this.conf)); try { for (HRegion r : newRegions) { addRegionToMETA(metaTable, r, this.cluster.getHMasterAddress(), diff --git a/src/test/org/apache/hadoop/hbase/TestSplit.java b/src/test/org/apache/hadoop/hbase/TestSplit.java index 02c0fc439f0..165ab5cd776 100644 --- a/src/test/org/apache/hadoop/hbase/TestSplit.java +++ b/src/test/org/apache/hadoop/hbase/TestSplit.java @@ -85,67 +85,92 @@ public class TestSplit extends MultiRegionTable { Text midkey = new Text(); assertTrue(region.needsSplit(midkey)); HRegion [] regions = split(region); - // Assert can get rows out of new regions. Should be able to get first - // row from first region and the midkey from second region. - assertGet(regions[0], COLFAMILY_NAME3, new Text(START_KEY)); - assertGet(regions[1], COLFAMILY_NAME3, midkey); - // Test I can get scanner and that it starts at right place. - assertScan(regions[0], COLFAMILY_NAME3, new Text(START_KEY)); - assertScan(regions[1], COLFAMILY_NAME3, midkey); - // Now prove can't split regions that have references. - Text [] midkeys = new Text[regions.length]; - for (int i = 0; i < regions.length; i++) { - midkeys[i] = new Text(); - // Even after above splits, still needs split but after splits its - // unsplitable because biggest store file is reference. References - // make the store unsplittable, until something bigger comes along. - assertFalse(regions[i].needsSplit(midkeys[i])); - // Add so much data to this region, we create a store file that is > than - // one of our unsplitable references. - // it will. - for (int j = 0; j < 2; j++) { - addContent(regions[i], COLFAMILY_NAME3); + try { + // Need to open the regions. + // TODO: Add an 'open' to HRegion... don't do open by constructing + // instance. + for (int i = 0; i < regions.length; i++) { + regions[i] = openClosedRegion(regions[i]); + } + // Assert can get rows out of new regions. Should be able to get first + // row from first region and the midkey from second region. + assertGet(regions[0], COLFAMILY_NAME3, new Text(START_KEY)); + assertGet(regions[1], COLFAMILY_NAME3, midkey); + // Test I can get scanner and that it starts at right place. + assertScan(regions[0], COLFAMILY_NAME3, new Text(START_KEY)); + assertScan(regions[1], COLFAMILY_NAME3, midkey); + // Now prove can't split regions that have references. + Text[] midkeys = new Text[regions.length]; + for (int i = 0; i < regions.length; i++) { + midkeys[i] = new Text(); + // Even after above splits, still needs split but after splits its + // unsplitable because biggest store file is reference. References + // make the store unsplittable, until something bigger comes along. + assertFalse(regions[i].needsSplit(midkeys[i])); + // Add so much data to this region, we create a store file that is > + // than + // one of our unsplitable references. + // it will. + for (int j = 0; j < 2; j++) { + addContent(regions[i], COLFAMILY_NAME3); + } + addContent(regions[i], COLFAMILY_NAME2); + addContent(regions[i], COLFAMILY_NAME1); + regions[i].flushcache(); } - addContent(regions[i], COLFAMILY_NAME2); - addContent(regions[i], COLFAMILY_NAME1); - regions[i].flushcache(); - } - - // Assert that even if one store file is larger than a reference, the - // region is still deemed unsplitable (Can't split region if references - // presen). - for (int i = 0; i < regions.length; i++) { - midkeys[i] = new Text(); - // Even after above splits, still needs split but after splits its - // unsplitable because biggest store file is reference. References - // make the store unsplittable, until something bigger comes along. - assertFalse(regions[i].needsSplit(midkeys[i])); - } - - // To make regions splitable force compaction. - for (int i = 0; i < regions.length; i++) { - regions[i].compactStores(); - } - TreeMap sortedMap = new TreeMap(); - // Split these two daughter regions so then I'll have 4 regions. Will - // split because added data above. - for (int i = 0; i < regions.length; i++) { - HRegion [] rs = split(regions[i]); - for (int j = 0; j < rs.length; j++) { - sortedMap.put(rs[j].getRegionName().toString(), rs[j]); + // Assert that even if one store file is larger than a reference, the + // region is still deemed unsplitable (Can't split region if references + // presen). + for (int i = 0; i < regions.length; i++) { + midkeys[i] = new Text(); + // Even after above splits, still needs split but after splits its + // unsplitable because biggest store file is reference. References + // make the store unsplittable, until something bigger comes along. + assertFalse(regions[i].needsSplit(midkeys[i])); + } + + // To make regions splitable force compaction. + for (int i = 0; i < regions.length; i++) { + regions[i].compactStores(); + } + + TreeMap sortedMap = new TreeMap(); + // Split these two daughter regions so then I'll have 4 regions. Will + // split because added data above. + for (int i = 0; i < regions.length; i++) { + HRegion[] rs = split(regions[i]); + for (int j = 0; j < rs.length; j++) { + sortedMap.put(rs[j].getRegionName().toString(), + openClosedRegion(rs[j])); + } + } + LOG.info("Made 4 regions"); + // The splits should have been even. Test I can get some arbitrary row out + // of each. + int interval = (LAST_CHAR - FIRST_CHAR) / 3; + byte[] b = START_KEY.getBytes(HConstants.UTF8_ENCODING); + for (HRegion r : sortedMap.values()) { + assertGet(r, COLFAMILY_NAME3, new Text(new String(b, + HConstants.UTF8_ENCODING))); + b[0] += interval; + } + } finally { + for (int i = 0; i < regions.length; i++) { + try { + regions[i].close(); + } catch (IOException e) { + // Ignore. + } } } - LOG.info("Made 4 regions"); - // The splits should have been even. Test I can get some arbitrary row out - // of each. - int interval = (LAST_CHAR - FIRST_CHAR) / 3; - byte[] b = START_KEY.getBytes(HConstants.UTF8_ENCODING); - for (HRegion r: sortedMap.values()) { - assertGet(r, COLFAMILY_NAME3, - new Text(new String(b, HConstants.UTF8_ENCODING))); - b[0] += interval; - } + } + + private HRegion openClosedRegion(final HRegion closedRegion) + throws IOException { + return new HRegion(closedRegion.getRootDir(), closedRegion.getLog(), + closedRegion.getFilesystem(), closedRegion.getConf(), + closedRegion.getRegionInfo(), null, null); } /**