HBASE-16117 Fix Connection leak in mapred.TableOutputFormat
This commit is contained in:
parent
42106b89b1
commit
e1d130946b
|
@ -436,7 +436,7 @@ class ConnectionImplementation implements ClusterConnection, Closeable {
|
||||||
|
|
||||||
protected String clusterId = null;
|
protected String clusterId = null;
|
||||||
|
|
||||||
protected void retrieveClusterId() {
|
protected void retrieveClusterId() throws IOException {
|
||||||
if (clusterId != null) {
|
if (clusterId != null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2077,29 +2077,24 @@ public class HBaseAdmin implements Admin {
|
||||||
// We set it to make it fail as soon as possible if HBase is not available
|
// We set it to make it fail as soon as possible if HBase is not available
|
||||||
copyOfConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
|
copyOfConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
|
||||||
copyOfConf.setInt("zookeeper.recovery.retry", 0);
|
copyOfConf.setInt("zookeeper.recovery.retry", 0);
|
||||||
|
|
||||||
|
// Check ZK first.
|
||||||
|
// If the connection exists, we may have a connection to ZK that does not work anymore
|
||||||
try (ClusterConnection connection =
|
try (ClusterConnection connection =
|
||||||
(ClusterConnection)ConnectionFactory.createConnection(copyOfConf)) {
|
(ClusterConnection) ConnectionFactory.createConnection(copyOfConf);
|
||||||
// Check ZK first.
|
ZooKeeperKeepAliveConnection zkw = ((ConnectionImplementation) connection).
|
||||||
// If the connection exists, we may have a connection to ZK that does not work anymore
|
getKeepAliveZooKeeperWatcher();) {
|
||||||
ZooKeeperKeepAliveConnection zkw = null;
|
|
||||||
try {
|
// This is NASTY. FIX!!!! Dependent on internal implementation! TODO
|
||||||
// This is NASTY. FIX!!!! Dependent on internal implementation! TODO
|
zkw.getRecoverableZooKeeper().getZooKeeper().exists(zkw.baseZNode, false);
|
||||||
zkw = ((ConnectionImplementation)connection).
|
|
||||||
getKeepAliveZooKeeperWatcher();
|
|
||||||
zkw.getRecoverableZooKeeper().getZooKeeper().exists(zkw.baseZNode, false);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new ZooKeeperConnectionException("Can't connect to ZooKeeper", e);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
throw (InterruptedIOException)
|
|
||||||
new InterruptedIOException("Can't connect to ZooKeeper").initCause(e);
|
|
||||||
} catch (KeeperException e) {
|
|
||||||
throw new ZooKeeperConnectionException("Can't connect to ZooKeeper", e);
|
|
||||||
} finally {
|
|
||||||
if (zkw != null) {
|
|
||||||
zkw.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
connection.isMasterRunning();
|
connection.isMasterRunning();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ZooKeeperConnectionException("Can't connect to ZooKeeper", e);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
throw (InterruptedIOException)
|
||||||
|
new InterruptedIOException("Can't connect to ZooKeeper").initCause(e);
|
||||||
|
} catch (KeeperException e) {
|
||||||
|
throw new ZooKeeperConnectionException("Can't connect to ZooKeeper", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,7 @@ interface Registry {
|
||||||
/**
|
/**
|
||||||
* @param connection
|
* @param connection
|
||||||
*/
|
*/
|
||||||
void init(Connection connection);
|
void init(Connection connection) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return Meta region location
|
* @return Meta region location
|
||||||
|
@ -43,7 +43,7 @@ interface Registry {
|
||||||
/**
|
/**
|
||||||
* @return Cluster id.
|
* @return Cluster id.
|
||||||
*/
|
*/
|
||||||
String getClusterId();
|
String getClusterId() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return Count of 'running' regionservers
|
* @return Count of 'running' regionservers
|
||||||
|
|
|
@ -92,7 +92,7 @@ class ZooKeeperRegistry implements Registry {
|
||||||
private String clusterId = null;
|
private String clusterId = null;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getClusterId() {
|
public String getClusterId() throws IOException {
|
||||||
if (this.clusterId != null) return this.clusterId;
|
if (this.clusterId != null) return this.clusterId;
|
||||||
// No synchronized here, worse case we will retrieve it twice, that's
|
// No synchronized here, worse case we will retrieve it twice, that's
|
||||||
// not an issue.
|
// not an issue.
|
||||||
|
@ -105,8 +105,10 @@ class ZooKeeperRegistry implements Registry {
|
||||||
}
|
}
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
LOG.warn("Can't retrieve clusterId from ZooKeeper", e);
|
LOG.warn("Can't retrieve clusterId from ZooKeeper", e);
|
||||||
|
throw new IOException("ZooKeeperException ", e);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.warn("Can't retrieve clusterId from ZooKeeper", e);
|
LOG.warn("Can't retrieve clusterId from ZooKeeper", e);
|
||||||
|
throw e;
|
||||||
} finally {
|
} finally {
|
||||||
if (zkw != null) zkw.close();
|
if (zkw != null) zkw.close();
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,27 +53,31 @@ public class TableOutputFormat extends FileOutputFormat<ImmutableBytesWritable,
|
||||||
*/
|
*/
|
||||||
protected static class TableRecordWriter implements RecordWriter<ImmutableBytesWritable, Put> {
|
protected static class TableRecordWriter implements RecordWriter<ImmutableBytesWritable, Put> {
|
||||||
private BufferedMutator m_mutator;
|
private BufferedMutator m_mutator;
|
||||||
private Connection connection;
|
private Connection conn;
|
||||||
/**
|
|
||||||
* Instantiate a TableRecordWriter with the HBase HClient for writing. Assumes control over the
|
|
||||||
* lifecycle of {@code conn}.
|
|
||||||
*/
|
|
||||||
public TableRecordWriter(final BufferedMutator mutator) throws IOException {
|
|
||||||
this.m_mutator = mutator;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instantiate a TableRecordWriter with a BufferedMutator for batch writing.
|
||||||
|
*/
|
||||||
public TableRecordWriter(JobConf job) throws IOException {
|
public TableRecordWriter(JobConf job) throws IOException {
|
||||||
// expecting exactly one path
|
// expecting exactly one path
|
||||||
TableName tableName = TableName.valueOf(job.get(OUTPUT_TABLE));
|
TableName tableName = TableName.valueOf(job.get(OUTPUT_TABLE));
|
||||||
connection = ConnectionFactory.createConnection(job);
|
try {
|
||||||
m_mutator = connection.getBufferedMutator(tableName);
|
this.conn = ConnectionFactory.createConnection(job);
|
||||||
|
this.m_mutator = conn.getBufferedMutator(tableName);
|
||||||
|
} finally {
|
||||||
|
if (this.m_mutator == null) {
|
||||||
|
conn.close();
|
||||||
|
conn = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close(Reporter reporter) throws IOException {
|
public void close(Reporter reporter) throws IOException {
|
||||||
this.m_mutator.close();
|
if (this.m_mutator != null) {
|
||||||
if (connection != null) {
|
this.m_mutator.close();
|
||||||
connection.close();
|
}
|
||||||
connection = null;
|
if (conn != null) {
|
||||||
|
this.conn.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,6 +105,7 @@ public class TableOutputFormat extends FileOutputFormat<ImmutableBytesWritable,
|
||||||
public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name,
|
public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name,
|
||||||
Progressable progress)
|
Progressable progress)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
// Clear write buffer on fail is true by default so no need to reset it.
|
||||||
return new TableRecordWriter(job);
|
return new TableRecordWriter(job);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.MasterNotRunningException;
|
import org.apache.hadoop.hbase.MasterNotRunningException;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
|
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
||||||
import org.apache.hadoop.hbase.ipc.AbstractRpcClient;
|
import org.apache.hadoop.hbase.ipc.AbstractRpcClient;
|
||||||
import org.apache.hadoop.hbase.ipc.RpcClientFactory;
|
import org.apache.hadoop.hbase.ipc.RpcClientFactory;
|
||||||
import org.apache.hadoop.hbase.ipc.RpcClientImpl;
|
import org.apache.hadoop.hbase.ipc.RpcClientImpl;
|
||||||
|
@ -106,9 +107,9 @@ public class TestClientTimeouts {
|
||||||
// run some admin commands
|
// run some admin commands
|
||||||
HBaseAdmin.checkHBaseAvailable(conf);
|
HBaseAdmin.checkHBaseAvailable(conf);
|
||||||
admin.setBalancerRunning(false, false);
|
admin.setBalancerRunning(false, false);
|
||||||
} catch (MasterNotRunningException ex) {
|
} catch (ZooKeeperConnectionException ex) {
|
||||||
// Since we are randomly throwing SocketTimeoutExceptions, it is possible to get
|
// Since we are randomly throwing SocketTimeoutExceptions, it is possible to get
|
||||||
// a MasterNotRunningException. It's a bug if we get other exceptions.
|
// a ZooKeeperConnectionException. It's a bug if we get other exceptions.
|
||||||
lastFailed = true;
|
lastFailed = true;
|
||||||
} finally {
|
} finally {
|
||||||
if(admin != null) {
|
if(admin != null) {
|
||||||
|
|
|
@ -0,0 +1,104 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.mapred;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||||
|
import org.apache.hadoop.mapred.JobConf;
|
||||||
|
import org.apache.hadoop.mapred.RecordWriter;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spark creates many instances of TableOutputFormat within a single process. We need to make
|
||||||
|
* sure we can have many instances and not leak connections.
|
||||||
|
*
|
||||||
|
* This test creates a few TableOutputFormats and shouldn't fail due to ZK connection exhaustion.
|
||||||
|
*/
|
||||||
|
@Category(MediumTests.class)
|
||||||
|
public class TestTableOutputFormatConnectionExhaust {
|
||||||
|
|
||||||
|
private static final Log LOG =
|
||||||
|
LogFactory.getLog(TestTableOutputFormatConnectionExhaust.class);
|
||||||
|
|
||||||
|
private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
|
||||||
|
static final String TABLE = "TestTableOutputFormatConnectionExhaust";
|
||||||
|
static final String FAMILY = "family";
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
// Default in ZookeeperMiniCluster is 1000, setting artificially low to trigger exhaustion.
|
||||||
|
// need min of 7 to properly start the default mini HBase cluster
|
||||||
|
UTIL.getConfiguration().setInt(HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS, 10);
|
||||||
|
UTIL.startMiniCluster();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void afterClass() throws Exception {
|
||||||
|
UTIL.shutdownMiniCluster();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void before() throws IOException {
|
||||||
|
LOG.info("before");
|
||||||
|
UTIL.ensureSomeRegionServersAvailable(1);
|
||||||
|
LOG.info("before done");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open and close a TableOutputFormat. The closing the RecordWriter should release HBase
|
||||||
|
* Connection (ZK) resources, and will throw exception if they are exhausted.
|
||||||
|
*/
|
||||||
|
static void openCloseTableOutputFormat(int iter) throws IOException {
|
||||||
|
LOG.info("Instantiating TableOutputFormat connection " + iter);
|
||||||
|
JobConf conf = new JobConf();
|
||||||
|
conf.addResource(UTIL.getConfiguration());
|
||||||
|
conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
|
||||||
|
TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class,
|
||||||
|
ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
|
||||||
|
TableOutputFormat tof = new TableOutputFormat();
|
||||||
|
RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
|
||||||
|
rw.close(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testConnectionExhaustion() throws IOException {
|
||||||
|
int MAX_INSTANCES = 5; // fails on iteration 3 if zk connections leak
|
||||||
|
for (int i = 0; i < MAX_INSTANCES; i++) {
|
||||||
|
final int iter = i;
|
||||||
|
try {
|
||||||
|
openCloseTableOutputFormat(iter);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("Exception encountered", e);
|
||||||
|
fail("Failed on iteration " + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue