Merging r1566359 through r1568420 from trunk.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-5535@1568437 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
ba4b10354c
|
@ -312,6 +312,11 @@ Release 2.4.0 - UNRELEASED
|
||||||
HADOOP-10295. Allow distcp to automatically identify the checksum type of
|
HADOOP-10295. Allow distcp to automatically identify the checksum type of
|
||||||
source files and use it for the target. (jing9 and Laurent Goujon)
|
source files and use it for the target. (jing9 and Laurent Goujon)
|
||||||
|
|
||||||
|
HADOOP-10333. Fix grammatical error in overview.html document.
|
||||||
|
(René Nyffenegger via suresh)
|
||||||
|
|
||||||
|
HADOOP-10343. Change info to debug log in LossyRetryInvocationHandler. (arpit)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
@ -328,15 +333,36 @@ Release 2.4.0 - UNRELEASED
|
||||||
HADOOP-10330. TestFrameDecoder fails if it cannot bind port 12345.
|
HADOOP-10330. TestFrameDecoder fails if it cannot bind port 12345.
|
||||||
(Arpit Agarwal)
|
(Arpit Agarwal)
|
||||||
|
|
||||||
Release 2.3.0 - UNRELEASED
|
HADOOP-10326. M/R jobs can not access S3 if Kerberos is enabled. (bc Wong
|
||||||
|
via atm)
|
||||||
|
|
||||||
|
HADOOP-10338. Cannot get the FileStatus of the root inode from the new
|
||||||
|
Globber (cmccabe)
|
||||||
|
|
||||||
|
HADOOP-10249. LdapGroupsMapping should trim ldap password read from file.
|
||||||
|
(Dilli Armugam via suresh)
|
||||||
|
|
||||||
|
Release 2.3.1 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
||||||
HADOOP-8545. Filesystem Implementation for OpenStack Swift
|
NEW FEATURES
|
||||||
(Dmitry Mezhensky, David Dobbins, Stevel via stevel)
|
|
||||||
|
IMPROVEMENTS
|
||||||
|
|
||||||
|
OPTIMIZATIONS
|
||||||
|
|
||||||
|
BUG FIXES
|
||||||
|
|
||||||
|
Release 2.3.0 - 2014-02-18
|
||||||
|
|
||||||
|
INCOMPATIBLE CHANGES
|
||||||
|
|
||||||
NEW FEATURES
|
NEW FEATURES
|
||||||
|
|
||||||
|
HADOOP-8545. Filesystem Implementation for OpenStack Swift
|
||||||
|
(Dmitry Mezhensky, David Dobbins, Stevel via stevel)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
HADOOP-10046. Print a log message when SSL is enabled.
|
HADOOP-10046. Print a log message when SSL is enabled.
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -114,7 +114,8 @@ private String schemeFromPath(Path path) throws IOException {
|
||||||
if (fs != null) {
|
if (fs != null) {
|
||||||
scheme = fs.getUri().getScheme();
|
scheme = fs.getUri().getScheme();
|
||||||
} else {
|
} else {
|
||||||
scheme = fc.getDefaultFileSystem().getUri().getScheme();
|
scheme = fc.getFSofPath(fc.fixRelativePart(path)).
|
||||||
|
getUri().getScheme();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return scheme;
|
return scheme;
|
||||||
|
@ -126,7 +127,8 @@ private String authorityFromPath(Path path) throws IOException {
|
||||||
if (fs != null) {
|
if (fs != null) {
|
||||||
authority = fs.getUri().getAuthority();
|
authority = fs.getUri().getAuthority();
|
||||||
} else {
|
} else {
|
||||||
authority = fc.getDefaultFileSystem().getUri().getAuthority();
|
authority = fc.getFSofPath(fc.fixRelativePart(path)).
|
||||||
|
getUri().getAuthority();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return authority ;
|
return authority ;
|
||||||
|
@ -162,18 +164,26 @@ public FileStatus[] glob() throws IOException {
|
||||||
// Starting out at the root of the filesystem, we try to match
|
// Starting out at the root of the filesystem, we try to match
|
||||||
// filesystem entries against pattern components.
|
// filesystem entries against pattern components.
|
||||||
ArrayList<FileStatus> candidates = new ArrayList<FileStatus>(1);
|
ArrayList<FileStatus> candidates = new ArrayList<FileStatus>(1);
|
||||||
|
// To get the "real" FileStatus of root, we'd have to do an expensive
|
||||||
|
// RPC to the NameNode. So we create a placeholder FileStatus which has
|
||||||
|
// the correct path, but defaults for the rest of the information.
|
||||||
|
// Later, if it turns out we actually want the FileStatus of root, we'll
|
||||||
|
// replace the placeholder with a real FileStatus obtained from the
|
||||||
|
// NameNode.
|
||||||
|
FileStatus rootPlaceholder;
|
||||||
if (Path.WINDOWS && !components.isEmpty()
|
if (Path.WINDOWS && !components.isEmpty()
|
||||||
&& Path.isWindowsAbsolutePath(absPattern.toUri().getPath(), true)) {
|
&& Path.isWindowsAbsolutePath(absPattern.toUri().getPath(), true)) {
|
||||||
// On Windows the path could begin with a drive letter, e.g. /E:/foo.
|
// On Windows the path could begin with a drive letter, e.g. /E:/foo.
|
||||||
// We will skip matching the drive letter and start from listing the
|
// We will skip matching the drive letter and start from listing the
|
||||||
// root of the filesystem on that drive.
|
// root of the filesystem on that drive.
|
||||||
String driveLetter = components.remove(0);
|
String driveLetter = components.remove(0);
|
||||||
candidates.add(new FileStatus(0, true, 0, 0, 0, new Path(scheme,
|
rootPlaceholder = new FileStatus(0, true, 0, 0, 0, new Path(scheme,
|
||||||
authority, Path.SEPARATOR + driveLetter + Path.SEPARATOR)));
|
authority, Path.SEPARATOR + driveLetter + Path.SEPARATOR));
|
||||||
} else {
|
} else {
|
||||||
candidates.add(new FileStatus(0, true, 0, 0, 0,
|
rootPlaceholder = new FileStatus(0, true, 0, 0, 0,
|
||||||
new Path(scheme, authority, Path.SEPARATOR)));
|
new Path(scheme, authority, Path.SEPARATOR));
|
||||||
}
|
}
|
||||||
|
candidates.add(rootPlaceholder);
|
||||||
|
|
||||||
for (int componentIdx = 0; componentIdx < components.size();
|
for (int componentIdx = 0; componentIdx < components.size();
|
||||||
componentIdx++) {
|
componentIdx++) {
|
||||||
|
@ -245,6 +255,12 @@ public FileStatus[] glob() throws IOException {
|
||||||
candidates = newCandidates;
|
candidates = newCandidates;
|
||||||
}
|
}
|
||||||
for (FileStatus status : candidates) {
|
for (FileStatus status : candidates) {
|
||||||
|
// Use object equality to see if this status is the root placeholder.
|
||||||
|
// See the explanation for rootPlaceholder above for more information.
|
||||||
|
if (status == rootPlaceholder) {
|
||||||
|
status = getFileStatus(rootPlaceholder.getPath());
|
||||||
|
if (status == null) continue;
|
||||||
|
}
|
||||||
// HADOOP-3497 semantics: the user-defined filter is applied at the
|
// HADOOP-3497 semantics: the user-defined filter is applied at the
|
||||||
// end, once the full path is built up.
|
// end, once the full path is built up.
|
||||||
if (filter.accept(status.getPath())) {
|
if (filter.accept(status.getPath())) {
|
||||||
|
|
|
@ -443,6 +443,12 @@ public long getDefaultBlockSize() {
|
||||||
return getConf().getLong("fs.s3.block.size", 64 * 1024 * 1024);
|
return getConf().getLong("fs.s3.block.size", 64 * 1024 * 1024);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCanonicalServiceName() {
|
||||||
|
// Does not support Token
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
// diagnostic methods
|
// diagnostic methods
|
||||||
|
|
||||||
void dump() throws IOException {
|
void dump() throws IOException {
|
||||||
|
|
|
@ -733,4 +733,10 @@ public void setWorkingDirectory(Path newDir) {
|
||||||
public Path getWorkingDirectory() {
|
public Path getWorkingDirectory() {
|
||||||
return workingDir;
|
return workingDir;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCanonicalServiceName() {
|
||||||
|
// Does not support Token
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,11 +51,15 @@ protected Object invokeMethod(Method method, Object[] args) throws Throwable {
|
||||||
int retryCount = RetryCount.get();
|
int retryCount = RetryCount.get();
|
||||||
if (retryCount < this.numToDrop) {
|
if (retryCount < this.numToDrop) {
|
||||||
RetryCount.set(++retryCount);
|
RetryCount.set(++retryCount);
|
||||||
LOG.info("Drop the response. Current retryCount == " + retryCount);
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Drop the response. Current retryCount == " + retryCount);
|
||||||
|
}
|
||||||
throw new RetriableException("Fake Exception");
|
throw new RetriableException("Fake Exception");
|
||||||
} else {
|
} else {
|
||||||
LOG.info("retryCount == " + retryCount
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("retryCount == " + retryCount
|
||||||
+ ". It's time to normally process the response");
|
+ ". It's time to normally process the response");
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -220,7 +220,7 @@ final public static void setPingInterval(Configuration conf, int pingInterval) {
|
||||||
* @param conf Configuration
|
* @param conf Configuration
|
||||||
* @return the ping interval
|
* @return the ping interval
|
||||||
*/
|
*/
|
||||||
final static int getPingInterval(Configuration conf) {
|
final public static int getPingInterval(Configuration conf) {
|
||||||
return conf.getInt(CommonConfigurationKeys.IPC_PING_INTERVAL_KEY,
|
return conf.getInt(CommonConfigurationKeys.IPC_PING_INTERVAL_KEY,
|
||||||
CommonConfigurationKeys.IPC_PING_INTERVAL_DEFAULT);
|
CommonConfigurationKeys.IPC_PING_INTERVAL_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,6 +66,7 @@
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.conf.Configuration.IntegerRanges;
|
import org.apache.hadoop.conf.Configuration.IntegerRanges;
|
||||||
|
@ -454,9 +455,10 @@ public void refreshServiceAcl(Configuration conf, PolicyProvider provider) {
|
||||||
* Refresh the service authorization ACL for the service handled by this server
|
* Refresh the service authorization ACL for the service handled by this server
|
||||||
* using the specified Configuration.
|
* using the specified Configuration.
|
||||||
*/
|
*/
|
||||||
public void refreshServiceAclWithConfigration(Configuration conf,
|
@Private
|
||||||
|
public void refreshServiceAclWithLoadedConfiguration(Configuration conf,
|
||||||
PolicyProvider provider) {
|
PolicyProvider provider) {
|
||||||
serviceAuthorizationManager.refreshWithConfiguration(conf, provider);
|
serviceAuthorizationManager.refreshWithLoadedConfiguration(conf, provider);
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Returns a handle to the serviceAuthorizationManager (required in tests)
|
* Returns a handle to the serviceAuthorizationManager (required in tests)
|
||||||
|
|
|
@ -37,6 +37,7 @@
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.util.NativeCodeLoader;
|
import org.apache.hadoop.util.NativeCodeLoader;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.util.concurrent.Uninterruptibles;
|
import com.google.common.util.concurrent.Uninterruptibles;
|
||||||
|
|
||||||
|
@ -48,7 +49,7 @@
|
||||||
* See {@link DomainSocket} for more information about UNIX domain sockets.
|
* See {@link DomainSocket} for more information about UNIX domain sockets.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.LimitedPrivate("HDFS")
|
@InterfaceAudience.LimitedPrivate("HDFS")
|
||||||
public final class DomainSocketWatcher extends Thread implements Closeable {
|
public final class DomainSocketWatcher implements Closeable {
|
||||||
static {
|
static {
|
||||||
if (SystemUtils.IS_OS_WINDOWS) {
|
if (SystemUtils.IS_OS_WINDOWS) {
|
||||||
loadingFailureReason = "UNIX Domain sockets are not available on Windows.";
|
loadingFailureReason = "UNIX Domain sockets are not available on Windows.";
|
||||||
|
@ -281,7 +282,7 @@ public void add(DomainSocket sock, Handler handler) {
|
||||||
try {
|
try {
|
||||||
processedCond.await();
|
processedCond.await();
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
this.interrupt();
|
Thread.currentThread().interrupt();
|
||||||
}
|
}
|
||||||
if (!toAdd.contains(entry)) {
|
if (!toAdd.contains(entry)) {
|
||||||
break;
|
break;
|
||||||
|
@ -308,7 +309,7 @@ public void remove(DomainSocket sock) {
|
||||||
try {
|
try {
|
||||||
processedCond.await();
|
processedCond.await();
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
this.interrupt();
|
Thread.currentThread().interrupt();
|
||||||
}
|
}
|
||||||
if (!toRemove.containsKey(sock.fd)) {
|
if (!toRemove.containsKey(sock.fd)) {
|
||||||
break;
|
break;
|
||||||
|
@ -381,7 +382,8 @@ private void sendCallback(String caller, TreeMap<Integer, Entry> entries,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Thread watcherThread = new Thread(new Runnable() {
|
@VisibleForTesting
|
||||||
|
final Thread watcherThread = new Thread(new Runnable() {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
LOG.info(this + ": starting with interruptCheckPeriodMs = " +
|
LOG.info(this + ": starting with interruptCheckPeriodMs = " +
|
||||||
|
@ -443,6 +445,7 @@ public void run() {
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.error(toString() + " terminating on IOException", e);
|
LOG.error(toString() + " terminating on IOException", e);
|
||||||
} finally {
|
} finally {
|
||||||
|
kick(); // allow the handler for notificationSockets[0] to read a byte
|
||||||
for (Entry entry : entries.values()) {
|
for (Entry entry : entries.values()) {
|
||||||
sendCallback("close", entries, fdSet, entry.getDomainSocket().fd);
|
sendCallback("close", entries, fdSet, entry.getDomainSocket().fd);
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
|
|
||||||
import org.apache.hadoop.HadoopIllegalArgumentException;
|
import org.apache.hadoop.HadoopIllegalArgumentException;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||||
|
@ -240,4 +241,18 @@ public static synchronized Groups getUserToGroupsMappingService(
|
||||||
}
|
}
|
||||||
return GROUPS;
|
return GROUPS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create new groups used to map user-to-groups with loaded configuration.
|
||||||
|
* @param conf
|
||||||
|
* @return the groups being used to map user-to-groups.
|
||||||
|
*/
|
||||||
|
@Private
|
||||||
|
public static synchronized Groups
|
||||||
|
getUserToGroupsMappingServiceWithLoadedConfiguration(
|
||||||
|
Configuration conf) {
|
||||||
|
|
||||||
|
GROUPS = new Groups(conf);
|
||||||
|
return GROUPS;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -356,7 +356,7 @@ String extractPassword(String pwFile) {
|
||||||
c = reader.read();
|
c = reader.read();
|
||||||
}
|
}
|
||||||
reader.close();
|
reader.close();
|
||||||
return password.toString();
|
return password.toString().trim();
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
throw new RuntimeException("Could not read password file: " + pwFile, ioe);
|
throw new RuntimeException("Could not read password file: " + pwFile, ioe);
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||||
|
@ -122,10 +123,11 @@ public synchronized void refresh(Configuration conf,
|
||||||
// Make a copy of the original config, and load the policy file
|
// Make a copy of the original config, and load the policy file
|
||||||
Configuration policyConf = new Configuration(conf);
|
Configuration policyConf = new Configuration(conf);
|
||||||
policyConf.addResource(policyFile);
|
policyConf.addResource(policyFile);
|
||||||
refreshWithConfiguration(policyConf, provider);
|
refreshWithLoadedConfiguration(policyConf, provider);
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized void refreshWithConfiguration(Configuration conf,
|
@Private
|
||||||
|
public synchronized void refreshWithLoadedConfiguration(Configuration conf,
|
||||||
PolicyProvider provider) {
|
PolicyProvider provider) {
|
||||||
final Map<Class<?>, AccessControlList> newAcls =
|
final Map<Class<?>, AccessControlList> newAcls =
|
||||||
new IdentityHashMap<Class<?>, AccessControlList>();
|
new IdentityHashMap<Class<?>, AccessControlList>();
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.util;
|
||||||
|
|
||||||
|
import java.util.concurrent.locks.Condition;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents an object that you can wait for.
|
||||||
|
*/
|
||||||
|
public class Waitable<T> {
|
||||||
|
private T val;
|
||||||
|
private final Condition cond;
|
||||||
|
|
||||||
|
public Waitable(Condition cond) {
|
||||||
|
this.val = null;
|
||||||
|
this.cond = cond;
|
||||||
|
}
|
||||||
|
|
||||||
|
public T await() throws InterruptedException {
|
||||||
|
while (this.val == null) {
|
||||||
|
this.cond.await();
|
||||||
|
}
|
||||||
|
return this.val;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void provide(T val) {
|
||||||
|
this.val = val;
|
||||||
|
this.cond.signalAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasVal() {
|
||||||
|
return this.val != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public T getVal() {
|
||||||
|
return this.val;
|
||||||
|
}
|
||||||
|
}
|
|
@ -57,7 +57,7 @@ <h3>Platforms</h3>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
<li>
|
<li>
|
||||||
Hadoop was been demonstrated on GNU/Linux clusters with 2000 nodes.
|
Hadoop has been demonstrated on GNU/Linux clusters with more than 4000 nodes.
|
||||||
</li>
|
</li>
|
||||||
<li>
|
<li>
|
||||||
Windows is also a supported platform.
|
Windows is also a supported platform.
|
||||||
|
|
|
@ -55,4 +55,9 @@ public void testBlockSize() throws Exception {
|
||||||
fs.getFileStatus(file).getBlockSize());
|
fs.getFileStatus(file).getBlockSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCanonicalName() throws Exception {
|
||||||
|
assertNull("s3 doesn't support security token and shouldn't have canonical name",
|
||||||
|
fs.getCanonicalServiceName());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,6 +49,11 @@ protected void tearDown() throws Exception {
|
||||||
super.tearDown();
|
super.tearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCanonicalName() throws Exception {
|
||||||
|
assertNull("s3n doesn't support security token and shouldn't have canonical name",
|
||||||
|
fs.getCanonicalServiceName());
|
||||||
|
}
|
||||||
|
|
||||||
public void testListStatusForRoot() throws Exception {
|
public void testListStatusForRoot() throws Exception {
|
||||||
FileStatus[] paths = fs.listStatus(path("/"));
|
FileStatus[] paths = fs.listStatus(path("/"));
|
||||||
assertEquals("Root directory is not empty; ", 0, paths.length);
|
assertEquals("Root directory is not empty; ", 0, paths.length);
|
||||||
|
|
|
@ -73,9 +73,10 @@ public boolean handle(DomainSocket sock) {
|
||||||
*/
|
*/
|
||||||
@Test(timeout=60000)
|
@Test(timeout=60000)
|
||||||
public void testInterruption() throws Exception {
|
public void testInterruption() throws Exception {
|
||||||
DomainSocketWatcher watcher = new DomainSocketWatcher(10);
|
final DomainSocketWatcher watcher = new DomainSocketWatcher(10);
|
||||||
watcher.interrupt();
|
watcher.watcherThread.interrupt();
|
||||||
Uninterruptibles.joinUninterruptibly(watcher);
|
Uninterruptibles.joinUninterruptibly(watcher.watcherThread);
|
||||||
|
watcher.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout=300000)
|
@Test(timeout=300000)
|
||||||
|
|
|
@ -545,7 +545,8 @@ public READLINK3Response readlink(XDR xdr, SecurityHandler securityHandler,
|
||||||
return new READLINK3Response(Nfs3Status.NFS3ERR_SERVERFAULT);
|
return new READLINK3Response(Nfs3Status.NFS3ERR_SERVERFAULT);
|
||||||
}
|
}
|
||||||
if (MAX_READ_TRANSFER_SIZE < target.getBytes().length) {
|
if (MAX_READ_TRANSFER_SIZE < target.getBytes().length) {
|
||||||
return new READLINK3Response(Nfs3Status.NFS3ERR_IO, postOpAttr, null);
|
return new READLINK3Response(Nfs3Status.NFS3ERR_IO, postOpAttr,
|
||||||
|
new byte[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new READLINK3Response(Nfs3Status.NFS3_OK, postOpAttr,
|
return new READLINK3Response(Nfs3Status.NFS3_OK, postOpAttr,
|
||||||
|
@ -1828,7 +1829,8 @@ public COMMIT3Response commit(XDR xdr, Channel channel, int xid,
|
||||||
} catch (IOException e1) {
|
} catch (IOException e1) {
|
||||||
LOG.info("Can't get postOpAttr for fileId: " + handle.getFileId());
|
LOG.info("Can't get postOpAttr for fileId: " + handle.getFileId());
|
||||||
}
|
}
|
||||||
WccData fileWcc = new WccData(Nfs3Utils.getWccAttr(preOpAttr), postOpAttr);
|
WccData fileWcc = new WccData(preOpAttr == null ? null
|
||||||
|
: Nfs3Utils.getWccAttr(preOpAttr), postOpAttr);
|
||||||
return new COMMIT3Response(Nfs3Status.NFS3ERR_IO, fileWcc,
|
return new COMMIT3Response(Nfs3Status.NFS3ERR_IO, fileWcc,
|
||||||
Nfs3Constant.WRITE_COMMIT_VERF);
|
Nfs3Constant.WRITE_COMMIT_VERF);
|
||||||
}
|
}
|
||||||
|
|
|
@ -120,31 +120,6 @@ Trunk (Unreleased)
|
||||||
HDFS-5041. Add the time of last heartbeat to dead server Web UI (Shinichi
|
HDFS-5041. Add the time of last heartbeat to dead server Web UI (Shinichi
|
||||||
Yamashita via brandonli)
|
Yamashita via brandonli)
|
||||||
|
|
||||||
HDFS-5531. Combine the getNsQuota() and getDsQuota() methods in INode.
|
|
||||||
(szetszwo)
|
|
||||||
|
|
||||||
HDFS-5285. Flatten INodeFile hierarchy: Replace INodeFileUnderConstruction
|
|
||||||
and INodeFileUnderConstructionWithSnapshot with FileUnderContructionFeature.
|
|
||||||
(jing9 via szetszwo)
|
|
||||||
|
|
||||||
HDFS-5286. Flatten INodeDirectory hierarchy: Replace INodeDirectoryWithQuota
|
|
||||||
with DirectoryWithQuotaFeature. (szetszwo)
|
|
||||||
|
|
||||||
HDFS-5537. Remove FileWithSnapshot interface. (jing9 via szetszwo)
|
|
||||||
|
|
||||||
HDFS-5554. Flatten INodeFile hierarchy: Replace INodeFileWithSnapshot with
|
|
||||||
FileWithSnapshotFeature. (jing9 via szetszwo)
|
|
||||||
|
|
||||||
HDFS-5647. Merge INodeDirectory.Feature and INodeFile.Feature. (Haohui Mai
|
|
||||||
via jing9)
|
|
||||||
|
|
||||||
HDFS-5632. Flatten INodeDirectory hierarchy: Replace
|
|
||||||
INodeDirectoryWithSnapshot with DirectoryWithSnapshotFeature.
|
|
||||||
(jing9 via szetszwo)
|
|
||||||
|
|
||||||
HDFS-5715. Use Snapshot ID to indicate the corresponding Snapshot for a
|
|
||||||
FileDiff/DirectoryDiff. (jing9)
|
|
||||||
|
|
||||||
HDFS-5721. sharedEditsImage in Namenode#initializeSharedEdits() should be
|
HDFS-5721. sharedEditsImage in Namenode#initializeSharedEdits() should be
|
||||||
closed before method returns. (Ted Yu via junping_du)
|
closed before method returns. (Ted Yu via junping_du)
|
||||||
|
|
||||||
|
@ -275,8 +250,6 @@ Trunk (Unreleased)
|
||||||
HDFS-5719. FSImage#doRollback() should close prevState before return
|
HDFS-5719. FSImage#doRollback() should close prevState before return
|
||||||
(Ted Yu via brandonli)
|
(Ted Yu via brandonli)
|
||||||
|
|
||||||
HDFS-5726. Fix compilation error in AbstractINodeDiff for JDK7. (jing9)
|
|
||||||
|
|
||||||
HDFS-5768. Consolidate the serialization code in DelegationTokenSecretManager
|
HDFS-5768. Consolidate the serialization code in DelegationTokenSecretManager
|
||||||
(Haohui Mai via brandonli)
|
(Haohui Mai via brandonli)
|
||||||
|
|
||||||
|
@ -286,6 +259,60 @@ Trunk (Unreleased)
|
||||||
HDFS-5794. Fix the inconsistency of layout version number of
|
HDFS-5794. Fix the inconsistency of layout version number of
|
||||||
ADD_DATANODE_AND_STORAGE_UUIDS between trunk and branch-2. (jing9)
|
ADD_DATANODE_AND_STORAGE_UUIDS between trunk and branch-2. (jing9)
|
||||||
|
|
||||||
|
BREAKDOWN OF HDFS-5698 SUBTASKS AND RELATED JIRAS
|
||||||
|
|
||||||
|
HDFS-5717. Save FSImage header in protobuf. (Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5738. Serialize INode information in protobuf. (Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5772. Serialize under-construction file information in FSImage. (jing9)
|
||||||
|
|
||||||
|
HDFS-5783. Compute the digest before loading FSImage. (Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5785. Serialize symlink in protobuf. (Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5793. Optimize the serialization of PermissionStatus. (Haohui Mai via
|
||||||
|
jing9)
|
||||||
|
|
||||||
|
HDFS-5743. Use protobuf to serialize snapshot information. (jing9)
|
||||||
|
|
||||||
|
HDFS-5774. Serialize CachePool directives in protobuf. (Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5744. Serialize information for token managers in protobuf. (Haohui Mai
|
||||||
|
via jing9)
|
||||||
|
|
||||||
|
HDFS-5824. Add a Type field in Snapshot DiffEntry's protobuf definition.
|
||||||
|
(jing9)
|
||||||
|
|
||||||
|
HDFS-5808. Implement cancellation when saving FSImage. (Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5826. Update the stored edit logs to be consistent with the changes in
|
||||||
|
HDFS-5698 branch. (Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5797. Implement offline image viewer. (Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5771. Track progress when loading fsimage. (Haohui Mai via cnauroth)
|
||||||
|
|
||||||
|
HDFS-5871. Use PBHelper to serialize CacheDirectiveInfoExpirationProto.
|
||||||
|
(Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5884. LoadDelegator should use IOUtils.readFully() to read the magic
|
||||||
|
header. (Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5885. Add annotation for repeated fields in the protobuf definition.
|
||||||
|
(Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5906. Fixing findbugs and javadoc warnings in the HDFS-5698 branch.
|
||||||
|
(Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5911. The id of a CacheDirective instance does not get serialized in
|
||||||
|
the protobuf-fsimage. (Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5915. Refactor FSImageFormatProtobuf to simplify cross section reads.
|
||||||
|
(Haohui Mai via cnauroth)
|
||||||
|
|
||||||
|
HDFS-5847. Consolidate INodeReference into a separate section. (jing9)
|
||||||
|
|
||||||
Release 2.4.0 - UNRELEASED
|
Release 2.4.0 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
@ -311,6 +338,44 @@ Release 2.4.0 - UNRELEASED
|
||||||
HDFS-4911. Reduce PeerCache timeout to be commensurate with
|
HDFS-4911. Reduce PeerCache timeout to be commensurate with
|
||||||
dfs.datanode.socket.reuse.keepalive (cmccabe)
|
dfs.datanode.socket.reuse.keepalive (cmccabe)
|
||||||
|
|
||||||
|
HDFS-4370. Fix typo Blanacer in DataNode. (Chu Tong via shv)
|
||||||
|
|
||||||
|
HDFS-5929. Add blockpool % usage to HDFS federated nn page.
|
||||||
|
(Siqi Li via suresh)
|
||||||
|
|
||||||
|
HDFS-5810. Unify mmap cache and short-circuit file descriptor cache
|
||||||
|
(cmccabe)
|
||||||
|
|
||||||
|
HDFS-5940. Minor cleanups to ShortCircuitReplica, FsDatasetCache, and
|
||||||
|
DomainSocketWatcher (cmccabe)
|
||||||
|
|
||||||
|
HDFS-5531. Combine the getNsQuota() and getDsQuota() methods in INode.
|
||||||
|
(szetszwo)
|
||||||
|
|
||||||
|
HDFS-5285. Flatten INodeFile hierarchy: Replace INodeFileUnderConstruction
|
||||||
|
and INodeFileUnderConstructionWithSnapshot with FileUnderContructionFeature.
|
||||||
|
(jing9 via szetszwo)
|
||||||
|
|
||||||
|
HDFS-5286. Flatten INodeDirectory hierarchy: Replace INodeDirectoryWithQuota
|
||||||
|
with DirectoryWithQuotaFeature. (szetszwo)
|
||||||
|
|
||||||
|
HDFS-5537. Remove FileWithSnapshot interface. (jing9 via szetszwo)
|
||||||
|
|
||||||
|
HDFS-5554. Flatten INodeFile hierarchy: Replace INodeFileWithSnapshot with
|
||||||
|
FileWithSnapshotFeature. (jing9 via szetszwo)
|
||||||
|
|
||||||
|
HDFS-5647. Merge INodeDirectory.Feature and INodeFile.Feature. (Haohui Mai
|
||||||
|
via jing9)
|
||||||
|
|
||||||
|
HDFS-5632. Flatten INodeDirectory hierarchy: Replace
|
||||||
|
INodeDirectoryWithSnapshot with DirectoryWithSnapshotFeature.
|
||||||
|
(jing9 via szetszwo)
|
||||||
|
|
||||||
|
HDFS-5715. Use Snapshot ID to indicate the corresponding Snapshot for a
|
||||||
|
FileDiff/DirectoryDiff. (jing9)
|
||||||
|
|
||||||
|
HDFS-5726. Fix compilation error in AbstractINodeDiff for JDK7. (jing9)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
|
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
|
||||||
|
@ -353,7 +418,52 @@ Release 2.4.0 - UNRELEASED
|
||||||
HDFS-5900. Cannot set cache pool limit of "unlimited" via CacheAdmin.
|
HDFS-5900. Cannot set cache pool limit of "unlimited" via CacheAdmin.
|
||||||
(wang)
|
(wang)
|
||||||
|
|
||||||
Release 2.3.0 - UNRELEASED
|
HDFS-5886. Potential null pointer deference in RpcProgramNfs3#readlink()
|
||||||
|
(brandonli)
|
||||||
|
|
||||||
|
HDFS-4858. HDFS DataNode to NameNode RPC should timeout.
|
||||||
|
(Henry Wang via shv)
|
||||||
|
|
||||||
|
HDFS-5879. Some TestHftpFileSystem tests do not close streams.
|
||||||
|
(Gera Shegalov via suresh)
|
||||||
|
|
||||||
|
HDFS-5938. Make BlockReaderFactory#BlockReaderPeer a static class to avoid
|
||||||
|
a findbugs warning. (cmccabe)
|
||||||
|
|
||||||
|
HDFS-5891. webhdfs should not try connecting the DN during redirection
|
||||||
|
(Haohui Mai via brandonli)
|
||||||
|
|
||||||
|
HDFS-5904. TestFileStatus fails intermittently. (Mit Desai via kihwal)
|
||||||
|
|
||||||
|
HDFS-5941. add dfs.namenode.secondary.https-address and
|
||||||
|
dfs.namenode.secondary.https-address in hdfs-default.xml.
|
||||||
|
(Haohui Mai via cnauroth)
|
||||||
|
|
||||||
|
HDFS-5913. Nfs3Utils#getWccAttr() should check attr parameter against null
|
||||||
|
(brandonli)
|
||||||
|
|
||||||
|
HDFS-5934. New Namenode UI back button doesn't work as expected
|
||||||
|
(Travis Thompson via brandonli)
|
||||||
|
|
||||||
|
HDFS-5901. NameNode new UI doesn't support IE8 and IE9 on windows 7
|
||||||
|
(Vinayakumar B via brandonli)
|
||||||
|
|
||||||
|
HDFS-5943. 'dfs.namenode.https-address' property is not loaded from
|
||||||
|
configuration in federation setup. (suresh)
|
||||||
|
|
||||||
|
Release 2.3.1 - UNRELEASED
|
||||||
|
|
||||||
|
INCOMPATIBLE CHANGES
|
||||||
|
|
||||||
|
NEW FEATURES
|
||||||
|
|
||||||
|
IMPROVEMENTS
|
||||||
|
|
||||||
|
OPTIMIZATIONS
|
||||||
|
|
||||||
|
BUG FIXES
|
||||||
|
|
||||||
|
Release 2.3.0 - 2014-02-18
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
||||||
|
@ -891,6 +1001,12 @@ Release 2.3.0 - UNRELEASED
|
||||||
HDFS-5873. dfs.http.policy should have higher precedence over dfs.https.enable.
|
HDFS-5873. dfs.http.policy should have higher precedence over dfs.https.enable.
|
||||||
(Haohui Mai via jing9)
|
(Haohui Mai via jing9)
|
||||||
|
|
||||||
|
HDFS-5837. dfs.namenode.replication.considerLoad should consider
|
||||||
|
decommissioned nodes. (Tao Luo via shv)
|
||||||
|
|
||||||
|
HDFS-5921. Cannot browse file system via NN web UI if any directory has
|
||||||
|
the sticky bit set. (atm)
|
||||||
|
|
||||||
BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
|
BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
|
||||||
|
|
||||||
HDFS-4985. Add storage type to the protocol and expose it in block report
|
HDFS-4985. Add storage type to the protocol and expose it in block report
|
||||||
|
|
|
@ -8,6 +8,9 @@
|
||||||
<Match>
|
<Match>
|
||||||
<Package name="org.apache.hadoop.hdfs.server.namenode.ha.proto" />
|
<Package name="org.apache.hadoop.hdfs.server.namenode.ha.proto" />
|
||||||
</Match>
|
</Match>
|
||||||
|
<Match>
|
||||||
|
<Class name="~org.apache.hadoop.hdfs.server.namenode.FsImageProto.*" />
|
||||||
|
</Match>
|
||||||
<Match>
|
<Match>
|
||||||
<Package name="org.apache.hadoop.hdfs.qjournal.protocol" />
|
<Package name="org.apache.hadoop.hdfs.qjournal.protocol" />
|
||||||
</Match>
|
</Match>
|
||||||
|
|
|
@ -458,6 +458,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
<includes>
|
<includes>
|
||||||
<include>ClientDatanodeProtocol.proto</include>
|
<include>ClientDatanodeProtocol.proto</include>
|
||||||
<include>DatanodeProtocol.proto</include>
|
<include>DatanodeProtocol.proto</include>
|
||||||
|
<include>fsimage.proto</include>
|
||||||
</includes>
|
</includes>
|
||||||
</source>
|
</source>
|
||||||
<output>${project.build.directory}/generated-sources/java</output>
|
<output>${project.build.directory}/generated-sources/java</output>
|
||||||
|
|
|
@ -139,7 +139,7 @@ elif [ "$COMMAND" = "balancer" ] ; then
|
||||||
elif [ "$COMMAND" = "jmxget" ] ; then
|
elif [ "$COMMAND" = "jmxget" ] ; then
|
||||||
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
|
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
|
||||||
elif [ "$COMMAND" = "oiv" ] ; then
|
elif [ "$COMMAND" = "oiv" ] ; then
|
||||||
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
|
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
|
||||||
elif [ "$COMMAND" = "oev" ] ; then
|
elif [ "$COMMAND" = "oev" ] ; then
|
||||||
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
|
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
|
||||||
elif [ "$COMMAND" = "fetchdt" ] ; then
|
elif [ "$COMMAND" = "fetchdt" ] ; then
|
||||||
|
|
|
@ -23,7 +23,6 @@
|
||||||
import org.apache.hadoop.fs.ByteBufferReadable;
|
import org.apache.hadoop.fs.ByteBufferReadable;
|
||||||
import org.apache.hadoop.fs.ReadOption;
|
import org.apache.hadoop.fs.ReadOption;
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmap;
|
import org.apache.hadoop.hdfs.client.ClientMmap;
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmapManager;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -97,6 +96,5 @@ public interface BlockReader extends ByteBufferReadable {
|
||||||
* @return The ClientMmap object, or null if mmap is not
|
* @return The ClientMmap object, or null if mmap is not
|
||||||
* supported.
|
* supported.
|
||||||
*/
|
*/
|
||||||
ClientMmap getClientMmap(EnumSet<ReadOption> opts,
|
ClientMmap getClientMmap(EnumSet<ReadOption> opts);
|
||||||
ClientMmapManager mmapManager);
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,217 +24,748 @@
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitCache;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitCache.ShortCircuitReplicaCreator;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitReplicaInfo;
|
||||||
|
import org.apache.hadoop.hdfs.net.DomainPeer;
|
||||||
import org.apache.hadoop.hdfs.net.Peer;
|
import org.apache.hadoop.hdfs.net.Peer;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException;
|
||||||
import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
|
import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
|
||||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
|
||||||
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
|
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
|
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
|
||||||
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
|
||||||
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.ipc.RemoteException;
|
import org.apache.hadoop.ipc.RemoteException;
|
||||||
import org.apache.hadoop.net.unix.DomainSocket;
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
import org.apache.hadoop.security.AccessControlException;
|
import org.apache.hadoop.security.AccessControlException;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
|
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility class to create BlockReader implementations.
|
* Utility class to create BlockReader implementations.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public class BlockReaderFactory {
|
public class BlockReaderFactory implements ShortCircuitReplicaCreator {
|
||||||
/**
|
static final Log LOG = LogFactory.getLog(BlockReaderFactory.class);
|
||||||
* Create a new BlockReader specifically to satisfy a read.
|
|
||||||
* This method also sends the OP_READ_BLOCK request.
|
|
||||||
*
|
|
||||||
* @param conf the DFSClient configuration
|
|
||||||
* @param file File location
|
|
||||||
* @param block The block object
|
|
||||||
* @param blockToken The block token for security
|
|
||||||
* @param startOffset The read offset, relative to block head
|
|
||||||
* @param len The number of bytes to read, or -1 to read as many as
|
|
||||||
* possible.
|
|
||||||
* @param bufferSize The IO buffer size (not the client buffer size)
|
|
||||||
* Ignored except on the legacy BlockReader.
|
|
||||||
* @param verifyChecksum Whether to verify checksum
|
|
||||||
* @param clientName Client name. Used for log messages.
|
|
||||||
* @param peer The peer
|
|
||||||
* @param datanodeID The datanode that the Peer is connected to
|
|
||||||
* @param domainSocketFactory The DomainSocketFactory to notify if the Peer
|
|
||||||
* is a DomainPeer which turns out to be faulty.
|
|
||||||
* If null, no factory will be notified in this
|
|
||||||
* case.
|
|
||||||
* @param allowShortCircuitLocalReads True if short-circuit local reads
|
|
||||||
* should be allowed.
|
|
||||||
* @return New BlockReader instance
|
|
||||||
*/
|
|
||||||
public static BlockReader newBlockReader(DFSClient.Conf conf,
|
|
||||||
String file,
|
|
||||||
ExtendedBlock block,
|
|
||||||
Token<BlockTokenIdentifier> blockToken,
|
|
||||||
long startOffset, long len,
|
|
||||||
boolean verifyChecksum,
|
|
||||||
String clientName,
|
|
||||||
Peer peer,
|
|
||||||
DatanodeID datanodeID,
|
|
||||||
DomainSocketFactory domSockFactory,
|
|
||||||
PeerCache peerCache,
|
|
||||||
FileInputStreamCache fisCache,
|
|
||||||
boolean allowShortCircuitLocalReads,
|
|
||||||
CachingStrategy cachingStrategy)
|
|
||||||
throws IOException {
|
|
||||||
peer.setReadTimeout(conf.socketTimeout);
|
|
||||||
peer.setWriteTimeout(HdfsServerConstants.WRITE_TIMEOUT);
|
|
||||||
|
|
||||||
if (peer.getDomainSocket() != null) {
|
@VisibleForTesting
|
||||||
if (allowShortCircuitLocalReads && !conf.useLegacyBlockReaderLocal) {
|
static ShortCircuitReplicaCreator
|
||||||
// If this is a domain socket, and short-circuit local reads are
|
createShortCircuitReplicaInfoCallback = null;
|
||||||
// enabled, try to set up a BlockReaderLocal.
|
|
||||||
BlockReader reader = newShortCircuitBlockReader(conf, file,
|
private final DFSClient.Conf conf;
|
||||||
block, blockToken, startOffset, len, peer, datanodeID,
|
|
||||||
domSockFactory, verifyChecksum, fisCache, cachingStrategy);
|
/**
|
||||||
|
* The file name, for logging and debugging purposes.
|
||||||
|
*/
|
||||||
|
private String fileName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The block ID and block pool ID to use.
|
||||||
|
*/
|
||||||
|
private ExtendedBlock block;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The block token to use for security purposes.
|
||||||
|
*/
|
||||||
|
private Token<BlockTokenIdentifier> token;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The offset within the block to start reading at.
|
||||||
|
*/
|
||||||
|
private long startOffset;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If false, we won't try to verify the block checksum.
|
||||||
|
*/
|
||||||
|
private boolean verifyChecksum;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The name of this client.
|
||||||
|
*/
|
||||||
|
private String clientName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The DataNode we're talking to.
|
||||||
|
*/
|
||||||
|
private DatanodeInfo datanode;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If false, we won't try short-circuit local reads.
|
||||||
|
*/
|
||||||
|
private boolean allowShortCircuitLocalReads;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The ClientContext to use for things like the PeerCache.
|
||||||
|
*/
|
||||||
|
private ClientContext clientContext;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of bytes to read. -1 indicates no limit.
|
||||||
|
*/
|
||||||
|
private long length = -1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Caching strategy to use when reading the block.
|
||||||
|
*/
|
||||||
|
private CachingStrategy cachingStrategy;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Socket address to use to connect to peer.
|
||||||
|
*/
|
||||||
|
private InetSocketAddress inetSocketAddress;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remote peer factory to use to create a peer, if needed.
|
||||||
|
*/
|
||||||
|
private RemotePeerFactory remotePeerFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* UserGroupInformation to use for legacy block reader local objects, if needed.
|
||||||
|
*/
|
||||||
|
private UserGroupInformation userGroupInformation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration to use for legacy block reader local objects, if needed.
|
||||||
|
*/
|
||||||
|
private Configuration configuration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Information about the domain socket path we should use to connect to the
|
||||||
|
* local peer-- or null if we haven't examined the local domain socket.
|
||||||
|
*/
|
||||||
|
private DomainSocketFactory.PathInfo pathInfo;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The remaining number of times that we'll try to pull a socket out of the
|
||||||
|
* cache.
|
||||||
|
*/
|
||||||
|
private int remainingCacheTries;
|
||||||
|
|
||||||
|
public BlockReaderFactory(DFSClient.Conf conf) {
|
||||||
|
this.conf = conf;
|
||||||
|
this.remainingCacheTries = conf.nCachedConnRetry;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setFileName(String fileName) {
|
||||||
|
this.fileName = fileName;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setBlock(ExtendedBlock block) {
|
||||||
|
this.block = block;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setBlockToken(Token<BlockTokenIdentifier> token) {
|
||||||
|
this.token = token;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setStartOffset(long startOffset) {
|
||||||
|
this.startOffset = startOffset;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setVerifyChecksum(boolean verifyChecksum) {
|
||||||
|
this.verifyChecksum = verifyChecksum;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setClientName(String clientName) {
|
||||||
|
this.clientName = clientName;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setDatanodeInfo(DatanodeInfo datanode) {
|
||||||
|
this.datanode = datanode;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setAllowShortCircuitLocalReads(
|
||||||
|
boolean allowShortCircuitLocalReads) {
|
||||||
|
this.allowShortCircuitLocalReads = allowShortCircuitLocalReads;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setClientCacheContext(
|
||||||
|
ClientContext clientContext) {
|
||||||
|
this.clientContext = clientContext;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setLength(long length) {
|
||||||
|
this.length = length;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setCachingStrategy(
|
||||||
|
CachingStrategy cachingStrategy) {
|
||||||
|
this.cachingStrategy = cachingStrategy;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setInetSocketAddress (
|
||||||
|
InetSocketAddress inetSocketAddress) {
|
||||||
|
this.inetSocketAddress = inetSocketAddress;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setUserGroupInformation(
|
||||||
|
UserGroupInformation userGroupInformation) {
|
||||||
|
this.userGroupInformation = userGroupInformation;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setRemotePeerFactory(
|
||||||
|
RemotePeerFactory remotePeerFactory) {
|
||||||
|
this.remotePeerFactory = remotePeerFactory;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockReaderFactory setConfiguration(
|
||||||
|
Configuration configuration) {
|
||||||
|
this.configuration = configuration;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a BlockReader with the given options.
|
||||||
|
*
|
||||||
|
* This function will do the best it can to create a block reader that meets
|
||||||
|
* all of our requirements. We prefer short-circuit block readers
|
||||||
|
* (BlockReaderLocal and BlockReaderLocalLegacy) over remote ones, since the
|
||||||
|
* former avoid the overhead of socket communication. If short-circuit is
|
||||||
|
* unavailable, our next fallback is data transfer over UNIX domain sockets,
|
||||||
|
* if dfs.client.domain.socket.data.traffic has been enabled. If that doesn't
|
||||||
|
* work, we will try to create a remote block reader that operates over TCP
|
||||||
|
* sockets.
|
||||||
|
*
|
||||||
|
* There are a few caches that are important here.
|
||||||
|
*
|
||||||
|
* The ShortCircuitCache stores file descriptor objects which have been passed
|
||||||
|
* from the DataNode.
|
||||||
|
*
|
||||||
|
* The DomainSocketFactory stores information about UNIX domain socket paths
|
||||||
|
* that we not been able to use in the past, so that we don't waste time
|
||||||
|
* retrying them over and over. (Like all the caches, it does have a timeout,
|
||||||
|
* though.)
|
||||||
|
*
|
||||||
|
* The PeerCache stores peers that we have used in the past. If we can reuse
|
||||||
|
* one of these peers, we avoid the overhead of re-opening a socket. However,
|
||||||
|
* if the socket has been timed out on the remote end, our attempt to reuse
|
||||||
|
* the socket may end with an IOException. For that reason, we limit our
|
||||||
|
* attempts at socket reuse to dfs.client.cached.conn.retry times. After
|
||||||
|
* that, we create new sockets. This avoids the problem where a thread tries
|
||||||
|
* to talk to a peer that it hasn't talked to in a while, and has to clean out
|
||||||
|
* every entry in a socket cache full of stale entries.
|
||||||
|
*
|
||||||
|
* @return The new BlockReader. We will not return null.
|
||||||
|
*
|
||||||
|
* @throws InvalidToken
|
||||||
|
* If the block token was invalid.
|
||||||
|
* InvalidEncryptionKeyException
|
||||||
|
* If the encryption key was invalid.
|
||||||
|
* Other IOException
|
||||||
|
* If there was another problem.
|
||||||
|
*/
|
||||||
|
public BlockReader build() throws IOException {
|
||||||
|
BlockReader reader = null;
|
||||||
|
|
||||||
|
Preconditions.checkNotNull(configuration);
|
||||||
|
if (conf.shortCircuitLocalReads && allowShortCircuitLocalReads) {
|
||||||
|
if (clientContext.getUseLegacyBlockReaderLocal()) {
|
||||||
|
reader = getLegacyBlockReaderLocal();
|
||||||
if (reader != null) {
|
if (reader != null) {
|
||||||
// One we've constructed the short-circuit block reader, we don't
|
if (LOG.isTraceEnabled()) {
|
||||||
// need the socket any more. So let's return it to the cache.
|
LOG.trace(this + ": returning new legacy block reader local.");
|
||||||
if (peerCache != null) {
|
}
|
||||||
peerCache.put(datanodeID, peer);
|
return reader;
|
||||||
} else {
|
}
|
||||||
IOUtils.cleanup(null, peer);
|
} else {
|
||||||
|
reader = getBlockReaderLocal();
|
||||||
|
if (reader != null) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": returning new block reader local.");
|
||||||
}
|
}
|
||||||
return reader;
|
return reader;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If this is a domain socket and we couldn't (or didn't want to) set
|
}
|
||||||
// up a BlockReaderLocal, check that we are allowed to pass data traffic
|
if (conf.domainSocketDataTraffic) {
|
||||||
// over the socket before proceeding.
|
reader = getRemoteBlockReaderFromDomain();
|
||||||
if (!conf.domainSocketDataTraffic) {
|
if (reader != null) {
|
||||||
throw new IOException("Because we can't do short-circuit access, " +
|
if (LOG.isTraceEnabled()) {
|
||||||
"and data traffic over domain sockets is disabled, " +
|
LOG.trace(this + ": returning new remote block reader using " +
|
||||||
"we cannot use this socket to talk to " + datanodeID);
|
"UNIX domain socket on " + pathInfo.getPath());
|
||||||
|
}
|
||||||
|
return reader;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Preconditions.checkState(!DFSInputStream.tcpReadsDisabledForTesting,
|
||||||
if (conf.useLegacyBlockReader) {
|
"TCP reads were disabled for testing, but we failed to " +
|
||||||
@SuppressWarnings("deprecation")
|
"do a non-TCP read.");
|
||||||
RemoteBlockReader reader = RemoteBlockReader.newBlockReader(file,
|
return getRemoteBlockReaderFromTcp();
|
||||||
block, blockToken, startOffset, len, conf.ioBufferSize,
|
|
||||||
verifyChecksum, clientName, peer, datanodeID, peerCache,
|
|
||||||
cachingStrategy);
|
|
||||||
return reader;
|
|
||||||
} else {
|
|
||||||
return RemoteBlockReader2.newBlockReader(
|
|
||||||
file, block, blockToken, startOffset, len,
|
|
||||||
verifyChecksum, clientName, peer, datanodeID, peerCache,
|
|
||||||
cachingStrategy);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new short-circuit BlockReader.
|
* Get {@link BlockReaderLocalLegacy} for short circuited local reads.
|
||||||
*
|
* This block reader implements the path-based style of local reads
|
||||||
* Here, we ask the DataNode to pass us file descriptors over our
|
* first introduced in HDFS-2246.
|
||||||
* DomainSocket. If the DataNode declines to do so, we'll return null here;
|
|
||||||
* otherwise, we'll return the BlockReaderLocal. If the DataNode declines,
|
|
||||||
* this function will inform the DomainSocketFactory that short-circuit local
|
|
||||||
* reads are disabled for this DataNode, so that we don't ask again.
|
|
||||||
*
|
|
||||||
* @param conf the configuration.
|
|
||||||
* @param file the file name. Used in log messages.
|
|
||||||
* @param block The block object.
|
|
||||||
* @param blockToken The block token for security.
|
|
||||||
* @param startOffset The read offset, relative to block head.
|
|
||||||
* @param len The number of bytes to read, or -1 to read
|
|
||||||
* as many as possible.
|
|
||||||
* @param peer The peer to use.
|
|
||||||
* @param datanodeID The datanode that the Peer is connected to.
|
|
||||||
* @param domSockFactory The DomainSocketFactory to notify if the Peer
|
|
||||||
* is a DomainPeer which turns out to be faulty.
|
|
||||||
* If null, no factory will be notified in this
|
|
||||||
* case.
|
|
||||||
* @param verifyChecksum True if we should verify the checksums.
|
|
||||||
* Note: even if this is true, when
|
|
||||||
* DFS_CLIENT_READ_CHECKSUM_SKIP_CHECKSUM_KEY is
|
|
||||||
* set or the block is mlocked, we will skip
|
|
||||||
* checksums.
|
|
||||||
*
|
|
||||||
* @return The BlockReaderLocal, or null if the
|
|
||||||
* DataNode declined to provide short-circuit
|
|
||||||
* access.
|
|
||||||
* @throws IOException If there was a communication error.
|
|
||||||
*/
|
*/
|
||||||
private static BlockReaderLocal newShortCircuitBlockReader(
|
private BlockReader getLegacyBlockReaderLocal() throws IOException {
|
||||||
DFSClient.Conf conf, String file, ExtendedBlock block,
|
if (LOG.isTraceEnabled()) {
|
||||||
Token<BlockTokenIdentifier> blockToken, long startOffset,
|
LOG.trace(this + ": trying to construct BlockReaderLocalLegacy");
|
||||||
long len, Peer peer, DatanodeID datanodeID,
|
}
|
||||||
DomainSocketFactory domSockFactory, boolean verifyChecksum,
|
if (!DFSClient.isLocalAddress(inetSocketAddress)) {
|
||||||
FileInputStreamCache fisCache,
|
if (LOG.isTraceEnabled()) {
|
||||||
CachingStrategy cachingStrategy) throws IOException {
|
LOG.trace(this + ": can't construct BlockReaderLocalLegacy because " +
|
||||||
|
"the address " + inetSocketAddress + " is not local");
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (clientContext.getDisableLegacyBlockReaderLocal()) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": can't construct BlockReaderLocalLegacy because " +
|
||||||
|
"disableLegacyBlockReaderLocal is set.");
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
IOException ioe = null;
|
||||||
|
try {
|
||||||
|
return BlockReaderLocalLegacy.newBlockReader(conf,
|
||||||
|
userGroupInformation, configuration, fileName, block, token,
|
||||||
|
datanode, startOffset, length);
|
||||||
|
} catch (RemoteException remoteException) {
|
||||||
|
ioe = remoteException.unwrapRemoteException(
|
||||||
|
InvalidToken.class, AccessControlException.class);
|
||||||
|
} catch (IOException e) {
|
||||||
|
ioe = e;
|
||||||
|
}
|
||||||
|
if ((!(ioe instanceof AccessControlException)) &&
|
||||||
|
isSecurityException(ioe)) {
|
||||||
|
// Handle security exceptions.
|
||||||
|
// We do not handle AccessControlException here, since
|
||||||
|
// BlockReaderLocalLegacy#newBlockReader uses that exception to indicate
|
||||||
|
// that the user is not in dfs.block.local-path-access.user, a condition
|
||||||
|
// which requires us to disable legacy SCR.
|
||||||
|
throw ioe;
|
||||||
|
}
|
||||||
|
LOG.warn(this + ": error creating legacy BlockReaderLocal. " +
|
||||||
|
"Disabling legacy local reads.", ioe);
|
||||||
|
clientContext.setDisableLegacyBlockReaderLocal();
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private BlockReader getBlockReaderLocal() throws InvalidToken {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": trying to construct a BlockReaderLocal " +
|
||||||
|
"for short-circuit reads.");
|
||||||
|
}
|
||||||
|
if (pathInfo == null) {
|
||||||
|
pathInfo = clientContext.getDomainSocketFactory().
|
||||||
|
getPathInfo(inetSocketAddress, conf);
|
||||||
|
}
|
||||||
|
if (!pathInfo.getPathState().getUsableForShortCircuit()) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": " + pathInfo + " is not " +
|
||||||
|
"usable for short circuit; giving up on BlockReaderLocal.");
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
ShortCircuitCache cache = clientContext.getShortCircuitCache();
|
||||||
|
ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
|
||||||
|
ShortCircuitReplicaInfo info = cache.fetchOrCreate(key, this);
|
||||||
|
InvalidToken exc = info.getInvalidTokenException();
|
||||||
|
if (exc != null) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": got InvalidToken exception while trying to " +
|
||||||
|
"construct BlockReaderLocal via " + pathInfo.getPath());
|
||||||
|
}
|
||||||
|
throw exc;
|
||||||
|
}
|
||||||
|
if (info.getReplica() == null) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": failed to get ShortCircuitReplica. " +
|
||||||
|
"Cannot construct BlockReaderLocal via " + pathInfo.getPath());
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new BlockReaderLocal.Builder(conf).
|
||||||
|
setFilename(fileName).
|
||||||
|
setBlock(block).
|
||||||
|
setStartOffset(startOffset).
|
||||||
|
setShortCircuitReplica(info.getReplica()).
|
||||||
|
setDatanodeID(datanode).
|
||||||
|
setVerifyChecksum(verifyChecksum).
|
||||||
|
setCachingStrategy(cachingStrategy).
|
||||||
|
build();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch a pair of short-circuit block descriptors from a local DataNode.
|
||||||
|
*
|
||||||
|
* @return Null if we could not communicate with the datanode,
|
||||||
|
* a new ShortCircuitReplicaInfo object otherwise.
|
||||||
|
* ShortCircuitReplicaInfo objects may contain either an InvalidToken
|
||||||
|
* exception, or a ShortCircuitReplica object ready to use.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
|
||||||
|
if (createShortCircuitReplicaInfoCallback != null) {
|
||||||
|
ShortCircuitReplicaInfo info =
|
||||||
|
createShortCircuitReplicaInfoCallback.createShortCircuitReplicaInfo();
|
||||||
|
if (info != null) return info;
|
||||||
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": trying to create ShortCircuitReplicaInfo.");
|
||||||
|
}
|
||||||
|
BlockReaderPeer curPeer;
|
||||||
|
while (true) {
|
||||||
|
curPeer = nextDomainPeer();
|
||||||
|
if (curPeer == null) break;
|
||||||
|
DomainPeer peer = (DomainPeer)curPeer.peer;
|
||||||
|
try {
|
||||||
|
ShortCircuitReplicaInfo info = requestFileDescriptors(peer);
|
||||||
|
clientContext.getPeerCache().put(datanode, peer);
|
||||||
|
return info;
|
||||||
|
} catch (IOException e) {
|
||||||
|
if (curPeer.fromCache) {
|
||||||
|
// Handle an I/O error we got when using a cached socket.
|
||||||
|
// These are considered less serious, because the socket may be stale.
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug(this + ": closing stale domain peer " + peer, e);
|
||||||
|
}
|
||||||
|
IOUtils.cleanup(LOG, peer);
|
||||||
|
} else {
|
||||||
|
// Handle an I/O error we got when using a newly created socket.
|
||||||
|
// We temporarily disable the domain socket path for a few minutes in
|
||||||
|
// this case, to prevent wasting more time on it.
|
||||||
|
LOG.warn(this + ": I/O error requesting file descriptors. " +
|
||||||
|
"Disabling domain socket " + peer.getDomainSocket(), e);
|
||||||
|
IOUtils.cleanup(LOG, peer);
|
||||||
|
clientContext.getDomainSocketFactory()
|
||||||
|
.disableDomainSocketPath(pathInfo.getPath());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Request file descriptors from a DomainPeer.
|
||||||
|
*
|
||||||
|
* @return A ShortCircuitReplica object if we could communicate with the
|
||||||
|
* datanode; null, otherwise.
|
||||||
|
* @throws IOException If we encountered an I/O exception while communicating
|
||||||
|
* with the datanode.
|
||||||
|
*/
|
||||||
|
private ShortCircuitReplicaInfo requestFileDescriptors(DomainPeer peer)
|
||||||
|
throws IOException {
|
||||||
final DataOutputStream out =
|
final DataOutputStream out =
|
||||||
new DataOutputStream(new BufferedOutputStream(
|
new DataOutputStream(new BufferedOutputStream(peer.getOutputStream()));
|
||||||
peer.getOutputStream()));
|
new Sender(out).requestShortCircuitFds(block, token, 1);
|
||||||
new Sender(out).requestShortCircuitFds(block, blockToken, 1);
|
DataInputStream in = new DataInputStream(peer.getInputStream());
|
||||||
DataInputStream in =
|
|
||||||
new DataInputStream(peer.getInputStream());
|
|
||||||
BlockOpResponseProto resp = BlockOpResponseProto.parseFrom(
|
BlockOpResponseProto resp = BlockOpResponseProto.parseFrom(
|
||||||
PBHelper.vintPrefixed(in));
|
PBHelper.vintPrefixed(in));
|
||||||
DomainSocket sock = peer.getDomainSocket();
|
DomainSocket sock = peer.getDomainSocket();
|
||||||
switch (resp.getStatus()) {
|
switch (resp.getStatus()) {
|
||||||
case SUCCESS:
|
case SUCCESS:
|
||||||
BlockReaderLocal reader = null;
|
|
||||||
byte buf[] = new byte[1];
|
byte buf[] = new byte[1];
|
||||||
FileInputStream fis[] = new FileInputStream[2];
|
FileInputStream fis[] = new FileInputStream[2];
|
||||||
sock.recvFileInputStreams(fis, buf, 0, buf.length);
|
sock.recvFileInputStreams(fis, buf, 0, buf.length);
|
||||||
|
ShortCircuitReplica replica = null;
|
||||||
try {
|
try {
|
||||||
reader = new BlockReaderLocal.Builder(conf).
|
ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
|
||||||
setFilename(file).
|
replica = new ShortCircuitReplica(key, fis[0], fis[1],
|
||||||
setBlock(block).
|
clientContext.getShortCircuitCache(), Time.monotonicNow());
|
||||||
setStartOffset(startOffset).
|
} catch (IOException e) {
|
||||||
setStreams(fis).
|
// This indicates an error reading from disk, or a format error. Since
|
||||||
setDatanodeID(datanodeID).
|
// it's not a socket communication problem, we return null rather than
|
||||||
setVerifyChecksum(verifyChecksum).
|
// throwing an exception.
|
||||||
setBlockMetadataHeader(
|
LOG.warn(this + ": error creating ShortCircuitReplica.", e);
|
||||||
BlockMetadataHeader.preadHeader(fis[1].getChannel())).
|
return null;
|
||||||
setFileInputStreamCache(fisCache).
|
|
||||||
setCachingStrategy(cachingStrategy).
|
|
||||||
build();
|
|
||||||
} finally {
|
} finally {
|
||||||
if (reader == null) {
|
if (replica == null) {
|
||||||
IOUtils.cleanup(DFSClient.LOG, fis[0], fis[1]);
|
IOUtils.cleanup(DFSClient.LOG, fis[0], fis[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return reader;
|
return new ShortCircuitReplicaInfo(replica);
|
||||||
case ERROR_UNSUPPORTED:
|
case ERROR_UNSUPPORTED:
|
||||||
if (!resp.hasShortCircuitAccessVersion()) {
|
if (!resp.hasShortCircuitAccessVersion()) {
|
||||||
DFSClient.LOG.warn("short-circuit read access is disabled for " +
|
LOG.warn("short-circuit read access is disabled for " +
|
||||||
"DataNode " + datanodeID + ". reason: " + resp.getMessage());
|
"DataNode " + datanode + ". reason: " + resp.getMessage());
|
||||||
domSockFactory.disableShortCircuitForPath(sock.getPath());
|
clientContext.getDomainSocketFactory()
|
||||||
|
.disableShortCircuitForPath(pathInfo.getPath());
|
||||||
} else {
|
} else {
|
||||||
DFSClient.LOG.warn("short-circuit read access for the file " +
|
LOG.warn("short-circuit read access for the file " +
|
||||||
file + " is disabled for DataNode " + datanodeID +
|
fileName + " is disabled for DataNode " + datanode +
|
||||||
". reason: " + resp.getMessage());
|
". reason: " + resp.getMessage());
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
case ERROR_ACCESS_TOKEN:
|
case ERROR_ACCESS_TOKEN:
|
||||||
String msg = "access control error while " +
|
String msg = "access control error while " +
|
||||||
"attempting to set up short-circuit access to " +
|
"attempting to set up short-circuit access to " +
|
||||||
file + resp.getMessage();
|
fileName + resp.getMessage();
|
||||||
DFSClient.LOG.debug(msg);
|
if (LOG.isDebugEnabled()) {
|
||||||
throw new InvalidBlockTokenException(msg);
|
LOG.debug(this + ":" + msg);
|
||||||
|
}
|
||||||
|
return new ShortCircuitReplicaInfo(new InvalidToken(msg));
|
||||||
default:
|
default:
|
||||||
DFSClient.LOG.warn("error while attempting to set up short-circuit " +
|
LOG.warn(this + "unknown response code " + resp.getStatus() + " while " +
|
||||||
"access to " + file + ": " + resp.getMessage());
|
"attempting to set up short-circuit access. " + resp.getMessage());
|
||||||
domSockFactory.disableShortCircuitForPath(sock.getPath());
|
clientContext.getDomainSocketFactory()
|
||||||
|
.disableShortCircuitForPath(pathInfo.getPath());
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a RemoteBlockReader that communicates over a UNIX domain socket.
|
||||||
|
*
|
||||||
|
* @return The new BlockReader, or null if we failed to create the block
|
||||||
|
* reader.
|
||||||
|
*
|
||||||
|
* @throws InvalidToken If the block token was invalid.
|
||||||
|
* Potentially other security-related execptions.
|
||||||
|
*/
|
||||||
|
private BlockReader getRemoteBlockReaderFromDomain() throws IOException {
|
||||||
|
if (pathInfo == null) {
|
||||||
|
pathInfo = clientContext.getDomainSocketFactory().
|
||||||
|
getPathInfo(inetSocketAddress, conf);
|
||||||
|
}
|
||||||
|
if (!pathInfo.getPathState().getUsableForDataTransfer()) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": not trying to create a remote block reader " +
|
||||||
|
"because the UNIX domain socket at " + pathInfo +
|
||||||
|
" is not usable.");
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": trying to create a remote block reader from the " +
|
||||||
|
"UNIX domain socket at " + pathInfo.getPath());
|
||||||
|
}
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
BlockReaderPeer curPeer = nextDomainPeer();
|
||||||
|
if (curPeer == null) break;
|
||||||
|
DomainPeer peer = (DomainPeer)curPeer.peer;
|
||||||
|
BlockReader blockReader = null;
|
||||||
|
try {
|
||||||
|
blockReader = getRemoteBlockReader(peer);
|
||||||
|
return blockReader;
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
IOUtils.cleanup(LOG, peer);
|
||||||
|
if (isSecurityException(ioe)) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": got security exception while constructing " +
|
||||||
|
"a remote block reader from the unix domain socket at " +
|
||||||
|
pathInfo.getPath(), ioe);
|
||||||
|
}
|
||||||
|
throw ioe;
|
||||||
|
}
|
||||||
|
if (curPeer.fromCache) {
|
||||||
|
// Handle an I/O error we got when using a cached peer. These are
|
||||||
|
// considered less serious, because the underlying socket may be stale.
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Closed potentially stale domain peer " + peer, ioe);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Handle an I/O error we got when using a newly created domain peer.
|
||||||
|
// We temporarily disable the domain socket path for a few minutes in
|
||||||
|
// this case, to prevent wasting more time on it.
|
||||||
|
LOG.warn("I/O error constructing remote block reader. Disabling " +
|
||||||
|
"domain socket " + peer.getDomainSocket(), ioe);
|
||||||
|
clientContext.getDomainSocketFactory()
|
||||||
|
.disableDomainSocketPath(pathInfo.getPath());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (blockReader == null) {
|
||||||
|
IOUtils.cleanup(LOG, peer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a RemoteBlockReader that communicates over a TCP socket.
|
||||||
|
*
|
||||||
|
* @return The new BlockReader. We will not return null, but instead throw
|
||||||
|
* an exception if this fails.
|
||||||
|
*
|
||||||
|
* @throws InvalidToken
|
||||||
|
* If the block token was invalid.
|
||||||
|
* InvalidEncryptionKeyException
|
||||||
|
* If the encryption key was invalid.
|
||||||
|
* Other IOException
|
||||||
|
* If there was another problem.
|
||||||
|
*/
|
||||||
|
private BlockReader getRemoteBlockReaderFromTcp() throws IOException {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": trying to create a remote block reader from a " +
|
||||||
|
"TCP socket");
|
||||||
|
}
|
||||||
|
BlockReader blockReader = null;
|
||||||
|
while (true) {
|
||||||
|
BlockReaderPeer curPeer = null;
|
||||||
|
Peer peer = null;
|
||||||
|
try {
|
||||||
|
curPeer = nextTcpPeer();
|
||||||
|
if (curPeer == null) break;
|
||||||
|
peer = curPeer.peer;
|
||||||
|
blockReader = getRemoteBlockReader(peer);
|
||||||
|
return blockReader;
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
if (isSecurityException(ioe)) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": got security exception while constructing " +
|
||||||
|
"a remote block reader from " + peer, ioe);
|
||||||
|
}
|
||||||
|
throw ioe;
|
||||||
|
}
|
||||||
|
if ((curPeer != null) && curPeer.fromCache) {
|
||||||
|
// Handle an I/O error we got when using a cached peer. These are
|
||||||
|
// considered less serious, because the underlying socket may be
|
||||||
|
// stale.
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Closed potentially stale remote peer " + peer, ioe);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Handle an I/O error we got when using a newly created peer.
|
||||||
|
LOG.warn("I/O error constructing remote block reader.", ioe);
|
||||||
|
throw ioe;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (blockReader == null) {
|
||||||
|
IOUtils.cleanup(LOG, peer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class BlockReaderPeer {
|
||||||
|
final Peer peer;
|
||||||
|
final boolean fromCache;
|
||||||
|
|
||||||
|
BlockReaderPeer(Peer peer, boolean fromCache) {
|
||||||
|
this.peer = peer;
|
||||||
|
this.fromCache = fromCache;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the next DomainPeer-- either from the cache or by creating it.
|
||||||
|
*
|
||||||
|
* @return the next DomainPeer, or null if we could not construct one.
|
||||||
|
*/
|
||||||
|
private BlockReaderPeer nextDomainPeer() {
|
||||||
|
if (remainingCacheTries > 0) {
|
||||||
|
Peer peer = clientContext.getPeerCache().get(datanode, true);
|
||||||
|
if (peer != null) {
|
||||||
|
remainingCacheTries--;
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("nextDomainPeer: reusing existing peer " + peer);
|
||||||
|
}
|
||||||
|
return new BlockReaderPeer(peer, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DomainSocket sock = clientContext.getDomainSocketFactory().
|
||||||
|
createSocket(pathInfo, conf.socketTimeout);
|
||||||
|
if (sock == null) return null;
|
||||||
|
return new BlockReaderPeer(new DomainPeer(sock), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the next TCP-based peer-- either from the cache or by creating it.
|
||||||
|
*
|
||||||
|
* @return the next Peer, or null if we could not construct one.
|
||||||
|
*
|
||||||
|
* @throws IOException If there was an error while constructing the peer
|
||||||
|
* (such as an InvalidEncryptionKeyException)
|
||||||
|
*/
|
||||||
|
private BlockReaderPeer nextTcpPeer() throws IOException {
|
||||||
|
if (remainingCacheTries > 0) {
|
||||||
|
Peer peer = clientContext.getPeerCache().get(datanode, false);
|
||||||
|
if (peer != null) {
|
||||||
|
remainingCacheTries--;
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("nextTcpPeer: reusing existing peer " + peer);
|
||||||
|
}
|
||||||
|
return new BlockReaderPeer(peer, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
Peer peer = remotePeerFactory.newConnectedPeer(inetSocketAddress);
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("nextTcpPeer: created newConnectedPeer " + peer);
|
||||||
|
}
|
||||||
|
return new BlockReaderPeer(peer, false);
|
||||||
|
} catch (IOException e) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("nextTcpPeer: failed to create newConnectedPeer " +
|
||||||
|
"connected to " + datanode);
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine if an exception is security-related.
|
||||||
|
*
|
||||||
|
* We need to handle these exceptions differently than other IOExceptions.
|
||||||
|
* They don't indicate a communication problem. Instead, they mean that there
|
||||||
|
* is some action the client needs to take, such as refetching block tokens,
|
||||||
|
* renewing encryption keys, etc.
|
||||||
|
*
|
||||||
|
* @param ioe The exception
|
||||||
|
* @return True only if the exception is security-related.
|
||||||
|
*/
|
||||||
|
private static boolean isSecurityException(IOException ioe) {
|
||||||
|
return (ioe instanceof InvalidToken) ||
|
||||||
|
(ioe instanceof InvalidEncryptionKeyException) ||
|
||||||
|
(ioe instanceof InvalidBlockTokenException) ||
|
||||||
|
(ioe instanceof AccessControlException);
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
|
private BlockReader getRemoteBlockReader(Peer peer) throws IOException {
|
||||||
|
if (conf.useLegacyBlockReader) {
|
||||||
|
return RemoteBlockReader.newBlockReader(fileName,
|
||||||
|
block, token, startOffset, length, conf.ioBufferSize,
|
||||||
|
verifyChecksum, clientName, peer, datanode,
|
||||||
|
clientContext.getPeerCache(), cachingStrategy);
|
||||||
|
} else {
|
||||||
|
return RemoteBlockReader2.newBlockReader(
|
||||||
|
fileName, block, token, startOffset, length,
|
||||||
|
verifyChecksum, clientName, peer, datanode,
|
||||||
|
clientContext.getPeerCache(), cachingStrategy);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "BlockReaderFactory(fileName=" + fileName + ", block=" + block + ")";
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* File name to print when accessing a block directly (from servlets)
|
* File name to print when accessing a block directly (from servlets)
|
||||||
* @param s Address of the block location
|
* @param s Address of the block location
|
||||||
|
@ -246,23 +777,4 @@ public static String getFileName(final InetSocketAddress s,
|
||||||
final String poolId, final long blockId) {
|
final String poolId, final long blockId) {
|
||||||
return s.toString() + ":" + poolId + ":" + blockId;
|
return s.toString() + ":" + poolId + ":" + blockId;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get {@link BlockReaderLocalLegacy} for short circuited local reads.
|
|
||||||
* This block reader implements the path-based style of local reads
|
|
||||||
* first introduced in HDFS-2246.
|
|
||||||
*/
|
|
||||||
static BlockReader getLegacyBlockReaderLocal(DFSClient dfsClient,
|
|
||||||
String src, ExtendedBlock blk,
|
|
||||||
Token<BlockTokenIdentifier> accessToken, DatanodeInfo chosenNode,
|
|
||||||
long offsetIntoBlock) throws InvalidToken, IOException {
|
|
||||||
try {
|
|
||||||
final long length = blk.getNumBytes() - offsetIntoBlock;
|
|
||||||
return BlockReaderLocalLegacy.newBlockReader(dfsClient, src, blk,
|
|
||||||
accessToken, chosenNode, offsetIntoBlock, length);
|
|
||||||
} catch (RemoteException re) {
|
|
||||||
throw re.unwrapRemoteException(InvalidToken.class,
|
|
||||||
AccessControlException.class);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,8 +28,9 @@
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.fs.ReadOption;
|
import org.apache.hadoop.fs.ReadOption;
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmap;
|
import org.apache.hadoop.hdfs.client.ClientMmap;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitCache;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
|
||||||
import org.apache.hadoop.hdfs.DFSClient.Conf;
|
import org.apache.hadoop.hdfs.DFSClient.Conf;
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmapManager;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
||||||
|
@ -67,12 +68,10 @@ public static class Builder {
|
||||||
private boolean verifyChecksum;
|
private boolean verifyChecksum;
|
||||||
private int maxReadahead;
|
private int maxReadahead;
|
||||||
private String filename;
|
private String filename;
|
||||||
private FileInputStream streams[];
|
private ShortCircuitReplica replica;
|
||||||
private long dataPos;
|
private long dataPos;
|
||||||
private DatanodeID datanodeID;
|
private DatanodeID datanodeID;
|
||||||
private FileInputStreamCache fisCache;
|
|
||||||
private boolean mlocked;
|
private boolean mlocked;
|
||||||
private BlockMetadataHeader header;
|
|
||||||
private ExtendedBlock block;
|
private ExtendedBlock block;
|
||||||
|
|
||||||
public Builder(Conf conf) {
|
public Builder(Conf conf) {
|
||||||
|
@ -99,8 +98,8 @@ public Builder setFilename(String filename) {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder setStreams(FileInputStream streams[]) {
|
public Builder setShortCircuitReplica(ShortCircuitReplica replica) {
|
||||||
this.streams = streams;
|
this.replica = replica;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,30 +113,18 @@ public Builder setDatanodeID(DatanodeID datanodeID) {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder setFileInputStreamCache(FileInputStreamCache fisCache) {
|
|
||||||
this.fisCache = fisCache;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Builder setMlocked(boolean mlocked) {
|
public Builder setMlocked(boolean mlocked) {
|
||||||
this.mlocked = mlocked;
|
this.mlocked = mlocked;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder setBlockMetadataHeader(BlockMetadataHeader header) {
|
|
||||||
this.header = header;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Builder setBlock(ExtendedBlock block) {
|
public Builder setBlock(ExtendedBlock block) {
|
||||||
this.block = block;
|
this.block = block;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public BlockReaderLocal build() {
|
public BlockReaderLocal build() {
|
||||||
Preconditions.checkNotNull(streams);
|
Preconditions.checkNotNull(replica);
|
||||||
Preconditions.checkArgument(streams.length == 2);
|
|
||||||
Preconditions.checkNotNull(header);
|
|
||||||
return new BlockReaderLocal(this);
|
return new BlockReaderLocal(this);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -147,7 +134,7 @@ public BlockReaderLocal build() {
|
||||||
/**
|
/**
|
||||||
* Pair of streams for this block.
|
* Pair of streams for this block.
|
||||||
*/
|
*/
|
||||||
private final FileInputStream streams[];
|
private final ShortCircuitReplica replica;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The data FileChannel.
|
* The data FileChannel.
|
||||||
|
@ -207,12 +194,6 @@ public BlockReaderLocal build() {
|
||||||
*/
|
*/
|
||||||
private int checksumSize;
|
private int checksumSize;
|
||||||
|
|
||||||
/**
|
|
||||||
* FileInputStream cache to return the streams to upon closing,
|
|
||||||
* or null if we should just close them unconditionally.
|
|
||||||
*/
|
|
||||||
private final FileInputStreamCache fisCache;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maximum number of chunks to allocate.
|
* Maximum number of chunks to allocate.
|
||||||
*
|
*
|
||||||
|
@ -257,20 +238,18 @@ public BlockReaderLocal build() {
|
||||||
*/
|
*/
|
||||||
private ByteBuffer checksumBuf;
|
private ByteBuffer checksumBuf;
|
||||||
|
|
||||||
private boolean mmapDisabled = false;
|
|
||||||
|
|
||||||
private BlockReaderLocal(Builder builder) {
|
private BlockReaderLocal(Builder builder) {
|
||||||
this.streams = builder.streams;
|
this.replica = builder.replica;
|
||||||
this.dataIn = builder.streams[0].getChannel();
|
this.dataIn = replica.getDataStream().getChannel();
|
||||||
this.dataPos = builder.dataPos;
|
this.dataPos = builder.dataPos;
|
||||||
this.checksumIn = builder.streams[1].getChannel();
|
this.checksumIn = replica.getMetaStream().getChannel();
|
||||||
this.checksum = builder.header.getChecksum();
|
BlockMetadataHeader header = builder.replica.getMetaHeader();
|
||||||
|
this.checksum = header.getChecksum();
|
||||||
this.verifyChecksum = builder.verifyChecksum &&
|
this.verifyChecksum = builder.verifyChecksum &&
|
||||||
(this.checksum.getChecksumType().id != DataChecksum.CHECKSUM_NULL);
|
(this.checksum.getChecksumType().id != DataChecksum.CHECKSUM_NULL);
|
||||||
this.mlocked = new AtomicBoolean(builder.mlocked);
|
this.mlocked = new AtomicBoolean(builder.mlocked);
|
||||||
this.filename = builder.filename;
|
this.filename = builder.filename;
|
||||||
this.datanodeID = builder.datanodeID;
|
this.datanodeID = builder.datanodeID;
|
||||||
this.fisCache = builder.fisCache;
|
|
||||||
this.block = builder.block;
|
this.block = builder.block;
|
||||||
this.bytesPerChecksum = checksum.getBytesPerChecksum();
|
this.bytesPerChecksum = checksum.getBytesPerChecksum();
|
||||||
this.checksumSize = checksum.getChecksumSize();
|
this.checksumSize = checksum.getChecksumSize();
|
||||||
|
@ -642,20 +621,7 @@ public synchronized void close() throws IOException {
|
||||||
if (LOG.isTraceEnabled()) {
|
if (LOG.isTraceEnabled()) {
|
||||||
LOG.trace("close(filename=" + filename + ", block=" + block + ")");
|
LOG.trace("close(filename=" + filename + ", block=" + block + ")");
|
||||||
}
|
}
|
||||||
if (clientMmap != null) {
|
replica.unref();
|
||||||
clientMmap.unref();
|
|
||||||
clientMmap = null;
|
|
||||||
}
|
|
||||||
if (fisCache != null) {
|
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug("putting FileInputStream for " + filename +
|
|
||||||
" back into FileInputStreamCache");
|
|
||||||
}
|
|
||||||
fisCache.put(datanodeID, block, streams);
|
|
||||||
} else {
|
|
||||||
LOG.debug("closing FileInputStream for " + filename);
|
|
||||||
IOUtils.cleanup(LOG, dataIn, checksumIn);
|
|
||||||
}
|
|
||||||
freeDataBufIfExists();
|
freeDataBufIfExists();
|
||||||
freeChecksumBufIfExists();
|
freeChecksumBufIfExists();
|
||||||
}
|
}
|
||||||
|
@ -683,8 +649,7 @@ public boolean isShortCircuit() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized ClientMmap getClientMmap(EnumSet<ReadOption> opts,
|
public ClientMmap getClientMmap(EnumSet<ReadOption> opts) {
|
||||||
ClientMmapManager mmapManager) {
|
|
||||||
if ((!opts.contains(ReadOption.SKIP_CHECKSUMS)) &&
|
if ((!opts.contains(ReadOption.SKIP_CHECKSUMS)) &&
|
||||||
verifyChecksum && (!mlocked.get())) {
|
verifyChecksum && (!mlocked.get())) {
|
||||||
if (LOG.isTraceEnabled()) {
|
if (LOG.isTraceEnabled()) {
|
||||||
|
@ -694,27 +659,7 @@ public synchronized ClientMmap getClientMmap(EnumSet<ReadOption> opts,
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (clientMmap == null) {
|
return replica.getOrCreateClientMmap();
|
||||||
if (mmapDisabled) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
clientMmap = mmapManager.fetch(datanodeID, block, streams[0]);
|
|
||||||
if (clientMmap == null) {
|
|
||||||
mmapDisabled = true;
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
LOG.error("Interrupted while setting up mmap for " + filename, e);
|
|
||||||
Thread.currentThread().interrupt();
|
|
||||||
return null;
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOG.error("unable to set up mmap for " + filename, e);
|
|
||||||
mmapDisabled = true;
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return clientMmap;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -31,7 +31,6 @@
|
||||||
|
|
||||||
import org.apache.hadoop.fs.ReadOption;
|
import org.apache.hadoop.fs.ReadOption;
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmap;
|
import org.apache.hadoop.hdfs.client.ClientMmap;
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmapManager;
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
@ -175,19 +174,21 @@ private void removeBlockLocalPathInfo(ExtendedBlock b) {
|
||||||
/**
|
/**
|
||||||
* The only way this object can be instantiated.
|
* The only way this object can be instantiated.
|
||||||
*/
|
*/
|
||||||
static BlockReaderLocalLegacy newBlockReader(DFSClient dfsClient,
|
static BlockReaderLocalLegacy newBlockReader(DFSClient.Conf conf,
|
||||||
String file, ExtendedBlock blk, Token<BlockTokenIdentifier> token,
|
UserGroupInformation userGroupInformation,
|
||||||
DatanodeInfo node, long startOffset, long length)
|
Configuration configuration, String file, ExtendedBlock blk,
|
||||||
throws IOException {
|
Token<BlockTokenIdentifier> token, DatanodeInfo node,
|
||||||
final DFSClient.Conf conf = dfsClient.getConf();
|
long startOffset, long length) throws IOException {
|
||||||
|
|
||||||
LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node
|
LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node
|
||||||
.getIpcPort());
|
.getIpcPort());
|
||||||
// check the cache first
|
// check the cache first
|
||||||
BlockLocalPathInfo pathinfo = localDatanodeInfo.getBlockLocalPathInfo(blk);
|
BlockLocalPathInfo pathinfo = localDatanodeInfo.getBlockLocalPathInfo(blk);
|
||||||
if (pathinfo == null) {
|
if (pathinfo == null) {
|
||||||
pathinfo = getBlockPathInfo(dfsClient.ugi, blk, node,
|
if (userGroupInformation == null) {
|
||||||
dfsClient.getConfiguration(), dfsClient.getHdfsTimeout(), token,
|
userGroupInformation = UserGroupInformation.getCurrentUser();
|
||||||
|
}
|
||||||
|
pathinfo = getBlockPathInfo(userGroupInformation, blk, node,
|
||||||
|
configuration, conf.hdfsTimeout, token,
|
||||||
conf.connectToDnViaHostname);
|
conf.connectToDnViaHostname);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -708,8 +709,7 @@ public boolean isShortCircuit() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ClientMmap getClientMmap(EnumSet<ReadOption> opts,
|
public ClientMmap getClientMmap(EnumSet<ReadOption> opts) {
|
||||||
ClientMmapManager mmapManager) {
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,204 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.hdfs.DFSClient;
|
||||||
|
import org.apache.hadoop.hdfs.DFSClient.Conf;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitCache;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ClientContext contains context information for a client.
|
||||||
|
*
|
||||||
|
* This allows us to share caches such as the socket cache across
|
||||||
|
* DFSClient instances.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class ClientContext {
|
||||||
|
private static final Log LOG = LogFactory.getLog(ClientContext.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Global map of context names to caches contexts.
|
||||||
|
*/
|
||||||
|
private final static HashMap<String, ClientContext> CACHES =
|
||||||
|
new HashMap<String, ClientContext>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Name of context.
|
||||||
|
*/
|
||||||
|
private final String name;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* String representation of the configuration.
|
||||||
|
*/
|
||||||
|
private final String confString;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Caches short-circuit file descriptors, mmap regions.
|
||||||
|
*/
|
||||||
|
private final ShortCircuitCache shortCircuitCache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Caches TCP and UNIX domain sockets for reuse.
|
||||||
|
*/
|
||||||
|
private final PeerCache peerCache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stores information about socket paths.
|
||||||
|
*/
|
||||||
|
private final DomainSocketFactory domainSocketFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True if we should use the legacy BlockReaderLocal.
|
||||||
|
*/
|
||||||
|
private final boolean useLegacyBlockReaderLocal;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True if the legacy BlockReaderLocal is disabled.
|
||||||
|
*
|
||||||
|
* The legacy block reader local gets disabled completely whenever there is an
|
||||||
|
* error or miscommunication. The new block reader local code handles this
|
||||||
|
* case more gracefully inside DomainSocketFactory.
|
||||||
|
*/
|
||||||
|
private volatile boolean disableLegacyBlockReaderLocal = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether or not we complained about a DFSClient fetching a CacheContext that
|
||||||
|
* didn't match its config values yet.
|
||||||
|
*/
|
||||||
|
private boolean printedConfWarning = false;
|
||||||
|
|
||||||
|
private ClientContext(String name, Conf conf) {
|
||||||
|
this.name = name;
|
||||||
|
this.confString = confAsString(conf);
|
||||||
|
this.shortCircuitCache = new ShortCircuitCache(
|
||||||
|
conf.shortCircuitStreamsCacheSize,
|
||||||
|
conf.shortCircuitStreamsCacheExpiryMs,
|
||||||
|
conf.shortCircuitMmapCacheSize,
|
||||||
|
conf.shortCircuitMmapCacheExpiryMs,
|
||||||
|
conf.shortCircuitMmapCacheRetryTimeout,
|
||||||
|
conf.shortCircuitCacheStaleThresholdMs);
|
||||||
|
this.peerCache =
|
||||||
|
new PeerCache(conf.socketCacheCapacity, conf.socketCacheExpiry);
|
||||||
|
this.useLegacyBlockReaderLocal = conf.useLegacyBlockReaderLocal;
|
||||||
|
this.domainSocketFactory = new DomainSocketFactory(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String confAsString(Conf conf) {
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
builder.append("shortCircuitStreamsCacheSize = ").
|
||||||
|
append(conf.shortCircuitStreamsCacheSize).
|
||||||
|
append(", shortCircuitStreamsCacheExpiryMs = ").
|
||||||
|
append(conf.shortCircuitStreamsCacheExpiryMs).
|
||||||
|
append(", shortCircuitMmapCacheSize = ").
|
||||||
|
append(conf.shortCircuitMmapCacheSize).
|
||||||
|
append(", shortCircuitMmapCacheExpiryMs = ").
|
||||||
|
append(conf.shortCircuitMmapCacheExpiryMs).
|
||||||
|
append(", shortCircuitMmapCacheRetryTimeout = ").
|
||||||
|
append(conf.shortCircuitMmapCacheRetryTimeout).
|
||||||
|
append(", shortCircuitCacheStaleThresholdMs = ").
|
||||||
|
append(conf.shortCircuitCacheStaleThresholdMs).
|
||||||
|
append(", socketCacheCapacity = ").
|
||||||
|
append(conf.socketCacheCapacity).
|
||||||
|
append(", socketCacheExpiry = ").
|
||||||
|
append(conf.socketCacheExpiry).
|
||||||
|
append(", shortCircuitLocalReads = ").
|
||||||
|
append(conf.shortCircuitLocalReads).
|
||||||
|
append(", useLegacyBlockReaderLocal = ").
|
||||||
|
append(conf.useLegacyBlockReaderLocal).
|
||||||
|
append(", domainSocketDataTraffic = ").
|
||||||
|
append(conf.domainSocketDataTraffic);
|
||||||
|
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ClientContext get(String name, Conf conf) {
|
||||||
|
ClientContext context;
|
||||||
|
synchronized(ClientContext.class) {
|
||||||
|
context = CACHES.get(name);
|
||||||
|
if (context == null) {
|
||||||
|
context = new ClientContext(name, conf);
|
||||||
|
CACHES.put(name, context);
|
||||||
|
} else {
|
||||||
|
context.printConfWarningIfNeeded(conf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return context;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a client context, from a Configuration object.
|
||||||
|
*
|
||||||
|
* This method is less efficient than the version which takes a DFSClient#Conf
|
||||||
|
* object, and should be mostly used by tests.
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
public static ClientContext getFromConf(Configuration conf) {
|
||||||
|
return get(conf.get(DFSConfigKeys.DFS_CLIENT_CONTEXT,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_CONTEXT_DEFAULT),
|
||||||
|
new DFSClient.Conf(conf));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printConfWarningIfNeeded(Conf conf) {
|
||||||
|
String existing = this.getConfString();
|
||||||
|
String requested = confAsString(conf);
|
||||||
|
if (!existing.equals(requested)) {
|
||||||
|
if (!printedConfWarning) {
|
||||||
|
printedConfWarning = true;
|
||||||
|
LOG.warn("Existing client context '" + name + "' does not match " +
|
||||||
|
"requested configuration. Existing: " + existing +
|
||||||
|
", Requested: " + requested);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getConfString() {
|
||||||
|
return confString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ShortCircuitCache getShortCircuitCache() {
|
||||||
|
return shortCircuitCache;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PeerCache getPeerCache() {
|
||||||
|
return peerCache;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean getUseLegacyBlockReaderLocal() {
|
||||||
|
return useLegacyBlockReaderLocal;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean getDisableLegacyBlockReaderLocal() {
|
||||||
|
return disableLegacyBlockReaderLocal;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDisableLegacyBlockReaderLocal() {
|
||||||
|
disableLegacyBlockReaderLocal = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DomainSocketFactory getDomainSocketFactory() {
|
||||||
|
return domainSocketFactory;
|
||||||
|
}
|
||||||
|
}
|
|
@ -56,6 +56,8 @@
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CONTEXT;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CONTEXT_DEFAULT;
|
||||||
|
|
||||||
import java.io.BufferedOutputStream;
|
import java.io.BufferedOutputStream;
|
||||||
import java.io.DataInputStream;
|
import java.io.DataInputStream;
|
||||||
|
@ -108,9 +110,10 @@
|
||||||
import org.apache.hadoop.fs.UnresolvedLinkException;
|
import org.apache.hadoop.fs.UnresolvedLinkException;
|
||||||
import org.apache.hadoop.fs.VolumeId;
|
import org.apache.hadoop.fs.VolumeId;
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmapManager;
|
|
||||||
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
|
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
|
||||||
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
|
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
|
||||||
|
import org.apache.hadoop.hdfs.net.Peer;
|
||||||
|
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
||||||
import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
|
import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
|
||||||
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
|
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.CacheDirectiveIterator;
|
import org.apache.hadoop.hdfs.protocol.CacheDirectiveIterator;
|
||||||
|
@ -191,7 +194,7 @@
|
||||||
*
|
*
|
||||||
********************************************************/
|
********************************************************/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public class DFSClient implements java.io.Closeable {
|
public class DFSClient implements java.io.Closeable, RemotePeerFactory {
|
||||||
public static final Log LOG = LogFactory.getLog(DFSClient.class);
|
public static final Log LOG = LogFactory.getLog(DFSClient.class);
|
||||||
public static final long SERVER_DEFAULTS_VALIDITY_PERIOD = 60 * 60 * 1000L; // 1 hour
|
public static final long SERVER_DEFAULTS_VALIDITY_PERIOD = 60 * 60 * 1000L; // 1 hour
|
||||||
static final int TCP_WINDOW_SIZE = 128 * 1024; // 128 KB
|
static final int TCP_WINDOW_SIZE = 128 * 1024; // 128 KB
|
||||||
|
@ -212,49 +215,12 @@ public class DFSClient implements java.io.Closeable {
|
||||||
final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure;
|
final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure;
|
||||||
final FileSystem.Statistics stats;
|
final FileSystem.Statistics stats;
|
||||||
private final String authority;
|
private final String authority;
|
||||||
final PeerCache peerCache;
|
|
||||||
private Random r = new Random();
|
private Random r = new Random();
|
||||||
private SocketAddress[] localInterfaceAddrs;
|
private SocketAddress[] localInterfaceAddrs;
|
||||||
private DataEncryptionKey encryptionKey;
|
private DataEncryptionKey encryptionKey;
|
||||||
private boolean shouldUseLegacyBlockReaderLocal;
|
|
||||||
private final CachingStrategy defaultReadCachingStrategy;
|
private final CachingStrategy defaultReadCachingStrategy;
|
||||||
private final CachingStrategy defaultWriteCachingStrategy;
|
private final CachingStrategy defaultWriteCachingStrategy;
|
||||||
private ClientMmapManager mmapManager;
|
private final ClientContext clientContext;
|
||||||
|
|
||||||
private static final ClientMmapManagerFactory MMAP_MANAGER_FACTORY =
|
|
||||||
new ClientMmapManagerFactory();
|
|
||||||
|
|
||||||
private static final class ClientMmapManagerFactory {
|
|
||||||
private ClientMmapManager mmapManager = null;
|
|
||||||
/**
|
|
||||||
* Tracks the number of users of mmapManager.
|
|
||||||
*/
|
|
||||||
private int refcnt = 0;
|
|
||||||
|
|
||||||
synchronized ClientMmapManager get(Configuration conf) {
|
|
||||||
if (refcnt++ == 0) {
|
|
||||||
mmapManager = ClientMmapManager.fromConf(conf);
|
|
||||||
} else {
|
|
||||||
String mismatches = mmapManager.verifyConfigurationMatches(conf);
|
|
||||||
if (!mismatches.isEmpty()) {
|
|
||||||
LOG.warn("The ClientMmapManager settings you specified " +
|
|
||||||
"have been ignored because another thread created the " +
|
|
||||||
"ClientMmapManager first. " + mismatches);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return mmapManager;
|
|
||||||
}
|
|
||||||
|
|
||||||
synchronized void unref(ClientMmapManager mmapManager) {
|
|
||||||
if (this.mmapManager != mmapManager) {
|
|
||||||
throw new IllegalArgumentException();
|
|
||||||
}
|
|
||||||
if (--refcnt == 0) {
|
|
||||||
IOUtils.cleanup(LOG, mmapManager);
|
|
||||||
mmapManager = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DFSClient configuration
|
* DFSClient configuration
|
||||||
|
@ -301,6 +267,11 @@ public static class Conf {
|
||||||
final int shortCircuitStreamsCacheSize;
|
final int shortCircuitStreamsCacheSize;
|
||||||
final long shortCircuitStreamsCacheExpiryMs;
|
final long shortCircuitStreamsCacheExpiryMs;
|
||||||
|
|
||||||
|
final int shortCircuitMmapCacheSize;
|
||||||
|
final long shortCircuitMmapCacheExpiryMs;
|
||||||
|
final long shortCircuitMmapCacheRetryTimeout;
|
||||||
|
final long shortCircuitCacheStaleThresholdMs;
|
||||||
|
|
||||||
public Conf(Configuration conf) {
|
public Conf(Configuration conf) {
|
||||||
// The hdfsTimeout is currently the same as the ipc timeout
|
// The hdfsTimeout is currently the same as the ipc timeout
|
||||||
hdfsTimeout = Client.getTimeout(conf);
|
hdfsTimeout = Client.getTimeout(conf);
|
||||||
|
@ -416,6 +387,18 @@ public Conf(Configuration conf) {
|
||||||
shortCircuitStreamsCacheExpiryMs = conf.getLong(
|
shortCircuitStreamsCacheExpiryMs = conf.getLong(
|
||||||
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY,
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY,
|
||||||
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT);
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT);
|
||||||
|
shortCircuitMmapCacheSize = conf.getInt(
|
||||||
|
DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT);
|
||||||
|
shortCircuitMmapCacheExpiryMs = conf.getLong(
|
||||||
|
DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT);
|
||||||
|
shortCircuitMmapCacheRetryTimeout = conf.getLong(
|
||||||
|
DFSConfigKeys.DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS_DEFAULT);
|
||||||
|
shortCircuitCacheStaleThresholdMs = conf.getLong(
|
||||||
|
DFSConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
private DataChecksum.Type getChecksumType(Configuration conf) {
|
private DataChecksum.Type getChecksumType(Configuration conf) {
|
||||||
|
@ -477,8 +460,6 @@ Configuration getConfiguration() {
|
||||||
private final Map<String, DFSOutputStream> filesBeingWritten
|
private final Map<String, DFSOutputStream> filesBeingWritten
|
||||||
= new HashMap<String, DFSOutputStream>();
|
= new HashMap<String, DFSOutputStream>();
|
||||||
|
|
||||||
private final DomainSocketFactory domainSocketFactory;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Same as this(NameNode.getAddress(conf), conf);
|
* Same as this(NameNode.getAddress(conf), conf);
|
||||||
* @see #DFSClient(InetSocketAddress, Configuration)
|
* @see #DFSClient(InetSocketAddress, Configuration)
|
||||||
|
@ -526,8 +507,6 @@ public DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode,
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// Copy only the required DFSClient configuration
|
// Copy only the required DFSClient configuration
|
||||||
this.dfsClientConf = new Conf(conf);
|
this.dfsClientConf = new Conf(conf);
|
||||||
this.shouldUseLegacyBlockReaderLocal =
|
|
||||||
this.dfsClientConf.useLegacyBlockReaderLocal;
|
|
||||||
if (this.dfsClientConf.useLegacyBlockReaderLocal) {
|
if (this.dfsClientConf.useLegacyBlockReaderLocal) {
|
||||||
LOG.debug("Using legacy short-circuit local reads.");
|
LOG.debug("Using legacy short-circuit local reads.");
|
||||||
}
|
}
|
||||||
|
@ -572,9 +551,6 @@ public DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode,
|
||||||
this.namenode = proxyInfo.getProxy();
|
this.namenode = proxyInfo.getProxy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// read directly from the block file if configured.
|
|
||||||
this.domainSocketFactory = new DomainSocketFactory(dfsClientConf);
|
|
||||||
|
|
||||||
String localInterfaces[] =
|
String localInterfaces[] =
|
||||||
conf.getTrimmedStrings(DFSConfigKeys.DFS_CLIENT_LOCAL_INTERFACES);
|
conf.getTrimmedStrings(DFSConfigKeys.DFS_CLIENT_LOCAL_INTERFACES);
|
||||||
localInterfaceAddrs = getLocalInterfaceAddrs(localInterfaces);
|
localInterfaceAddrs = getLocalInterfaceAddrs(localInterfaces);
|
||||||
|
@ -584,7 +560,6 @@ public DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode,
|
||||||
Joiner.on(',').join(localInterfaceAddrs) + "]");
|
Joiner.on(',').join(localInterfaceAddrs) + "]");
|
||||||
}
|
}
|
||||||
|
|
||||||
this.peerCache = PeerCache.getInstance(dfsClientConf.socketCacheCapacity, dfsClientConf.socketCacheExpiry);
|
|
||||||
Boolean readDropBehind = (conf.get(DFS_CLIENT_CACHE_DROP_BEHIND_READS) == null) ?
|
Boolean readDropBehind = (conf.get(DFS_CLIENT_CACHE_DROP_BEHIND_READS) == null) ?
|
||||||
null : conf.getBoolean(DFS_CLIENT_CACHE_DROP_BEHIND_READS, false);
|
null : conf.getBoolean(DFS_CLIENT_CACHE_DROP_BEHIND_READS, false);
|
||||||
Long readahead = (conf.get(DFS_CLIENT_CACHE_READAHEAD) == null) ?
|
Long readahead = (conf.get(DFS_CLIENT_CACHE_READAHEAD) == null) ?
|
||||||
|
@ -595,7 +570,9 @@ public DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode,
|
||||||
new CachingStrategy(readDropBehind, readahead);
|
new CachingStrategy(readDropBehind, readahead);
|
||||||
this.defaultWriteCachingStrategy =
|
this.defaultWriteCachingStrategy =
|
||||||
new CachingStrategy(writeDropBehind, readahead);
|
new CachingStrategy(writeDropBehind, readahead);
|
||||||
this.mmapManager = MMAP_MANAGER_FACTORY.get(conf);
|
this.clientContext = ClientContext.get(
|
||||||
|
conf.get(DFS_CLIENT_CONTEXT, DFS_CLIENT_CONTEXT_DEFAULT),
|
||||||
|
dfsClientConf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -800,10 +777,6 @@ void closeConnectionToNamenode() {
|
||||||
|
|
||||||
/** Abort and release resources held. Ignore all errors. */
|
/** Abort and release resources held. Ignore all errors. */
|
||||||
void abort() {
|
void abort() {
|
||||||
if (mmapManager != null) {
|
|
||||||
MMAP_MANAGER_FACTORY.unref(mmapManager);
|
|
||||||
mmapManager = null;
|
|
||||||
}
|
|
||||||
clientRunning = false;
|
clientRunning = false;
|
||||||
closeAllFilesBeingWritten(true);
|
closeAllFilesBeingWritten(true);
|
||||||
try {
|
try {
|
||||||
|
@ -849,10 +822,6 @@ private void closeAllFilesBeingWritten(final boolean abort) {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public synchronized void close() throws IOException {
|
public synchronized void close() throws IOException {
|
||||||
if (mmapManager != null) {
|
|
||||||
MMAP_MANAGER_FACTORY.unref(mmapManager);
|
|
||||||
mmapManager = null;
|
|
||||||
}
|
|
||||||
if(clientRunning) {
|
if(clientRunning) {
|
||||||
closeAllFilesBeingWritten(false);
|
closeAllFilesBeingWritten(false);
|
||||||
clientRunning = false;
|
clientRunning = false;
|
||||||
|
@ -2626,18 +2595,6 @@ public String toString() {
|
||||||
+ ", ugi=" + ugi + "]";
|
+ ", ugi=" + ugi + "]";
|
||||||
}
|
}
|
||||||
|
|
||||||
public DomainSocketFactory getDomainSocketFactory() {
|
|
||||||
return domainSocketFactory;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void disableLegacyBlockReaderLocal() {
|
|
||||||
shouldUseLegacyBlockReaderLocal = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean useLegacyBlockReaderLocal() {
|
|
||||||
return shouldUseLegacyBlockReaderLocal;
|
|
||||||
}
|
|
||||||
|
|
||||||
public CachingStrategy getDefaultReadCachingStrategy() {
|
public CachingStrategy getDefaultReadCachingStrategy() {
|
||||||
return defaultReadCachingStrategy;
|
return defaultReadCachingStrategy;
|
||||||
}
|
}
|
||||||
|
@ -2646,8 +2603,29 @@ public CachingStrategy getDefaultWriteCachingStrategy() {
|
||||||
return defaultWriteCachingStrategy;
|
return defaultWriteCachingStrategy;
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
public ClientContext getClientContext() {
|
||||||
public ClientMmapManager getMmapManager() {
|
return clientContext;
|
||||||
return mmapManager;
|
}
|
||||||
|
|
||||||
|
@Override // RemotePeerFactory
|
||||||
|
public Peer newConnectedPeer(InetSocketAddress addr) throws IOException {
|
||||||
|
Peer peer = null;
|
||||||
|
boolean success = false;
|
||||||
|
Socket sock = null;
|
||||||
|
try {
|
||||||
|
sock = socketFactory.createSocket();
|
||||||
|
NetUtils.connect(sock, addr,
|
||||||
|
getRandomLocalInterfaceAddr(),
|
||||||
|
dfsClientConf.socketTimeout);
|
||||||
|
peer = TcpPeerServer.peerFromSocketAndKey(sock,
|
||||||
|
getDataEncryptionKey());
|
||||||
|
success = true;
|
||||||
|
return peer;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.cleanup(LOG, peer);
|
||||||
|
IOUtils.closeSocket(sock);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,6 +59,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
||||||
public static final String DFS_CLIENT_CACHE_DROP_BEHIND_WRITES = "dfs.client.cache.drop.behind.writes";
|
public static final String DFS_CLIENT_CACHE_DROP_BEHIND_WRITES = "dfs.client.cache.drop.behind.writes";
|
||||||
public static final String DFS_CLIENT_CACHE_DROP_BEHIND_READS = "dfs.client.cache.drop.behind.reads";
|
public static final String DFS_CLIENT_CACHE_DROP_BEHIND_READS = "dfs.client.cache.drop.behind.reads";
|
||||||
public static final String DFS_CLIENT_CACHE_READAHEAD = "dfs.client.cache.readahead";
|
public static final String DFS_CLIENT_CACHE_READAHEAD = "dfs.client.cache.readahead";
|
||||||
|
public static final String DFS_CLIENT_CONTEXT = "dfs.client.context";
|
||||||
|
public static final String DFS_CLIENT_CONTEXT_DEFAULT = "default";
|
||||||
public static final String DFS_HDFS_BLOCKS_METADATA_ENABLED = "dfs.datanode.hdfs-blocks-metadata.enabled";
|
public static final String DFS_HDFS_BLOCKS_METADATA_ENABLED = "dfs.datanode.hdfs-blocks-metadata.enabled";
|
||||||
public static final boolean DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT = false;
|
public static final boolean DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT = false;
|
||||||
public static final String DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_NUM_THREADS = "dfs.client.file-block-storage-locations.num-threads";
|
public static final String DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_NUM_THREADS = "dfs.client.file-block-storage-locations.num-threads";
|
||||||
|
@ -418,18 +420,20 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
||||||
public static final boolean DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT = false;
|
public static final boolean DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT = false;
|
||||||
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY = "dfs.client.read.shortcircuit.buffer.size";
|
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY = "dfs.client.read.shortcircuit.buffer.size";
|
||||||
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY = "dfs.client.read.shortcircuit.streams.cache.size";
|
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY = "dfs.client.read.shortcircuit.streams.cache.size";
|
||||||
public static final int DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT = 100;
|
public static final int DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT = 256;
|
||||||
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY = "dfs.client.read.shortcircuit.streams.cache.expiry.ms";
|
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY = "dfs.client.read.shortcircuit.streams.cache.expiry.ms";
|
||||||
public static final long DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT = 5000;
|
public static final long DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT = 5 * 60 * 1000;
|
||||||
public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024;
|
public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024;
|
||||||
public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = "dfs.client.domain.socket.data.traffic";
|
public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = "dfs.client.domain.socket.data.traffic";
|
||||||
public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = false;
|
public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = false;
|
||||||
public static final String DFS_CLIENT_MMAP_CACHE_SIZE = "dfs.client.mmap.cache.size";
|
public static final String DFS_CLIENT_MMAP_CACHE_SIZE = "dfs.client.mmap.cache.size";
|
||||||
public static final int DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT = 1024;
|
public static final int DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT = 256;
|
||||||
public static final String DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS = "dfs.client.mmap.cache.timeout.ms";
|
public static final String DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS = "dfs.client.mmap.cache.timeout.ms";
|
||||||
public static final long DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT = 15 * 60 * 1000;
|
public static final long DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT = 60 * 60 * 1000;
|
||||||
public static final String DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT = "dfs.client.mmap.cache.thread.runs.per.timeout";
|
public static final String DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS = "dfs.client.mmap.retry.timeout.ms";
|
||||||
public static final int DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT = 4;
|
public static final long DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS_DEFAULT = 5 * 60 * 1000;
|
||||||
|
public static final String DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS = "dfs.client.short.circuit.replica.stale.threshold.ms";
|
||||||
|
public static final long DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS_DEFAULT = 30 * 60 * 1000;
|
||||||
|
|
||||||
// property for fsimage compression
|
// property for fsimage compression
|
||||||
public static final String DFS_IMAGE_COMPRESS_KEY = "dfs.image.compress";
|
public static final String DFS_IMAGE_COMPRESS_KEY = "dfs.image.compress";
|
||||||
|
|
|
@ -46,9 +46,6 @@
|
||||||
import org.apache.hadoop.fs.ReadOption;
|
import org.apache.hadoop.fs.ReadOption;
|
||||||
import org.apache.hadoop.fs.UnresolvedLinkException;
|
import org.apache.hadoop.fs.UnresolvedLinkException;
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmap;
|
import org.apache.hadoop.hdfs.client.ClientMmap;
|
||||||
import org.apache.hadoop.hdfs.net.DomainPeer;
|
|
||||||
import org.apache.hadoop.hdfs.net.Peer;
|
|
||||||
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
|
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
|
@ -82,7 +79,6 @@ public class DFSInputStream extends FSInputStream
|
||||||
HasEnhancedByteBufferAccess {
|
HasEnhancedByteBufferAccess {
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
static boolean tcpReadsDisabledForTesting = false;
|
static boolean tcpReadsDisabledForTesting = false;
|
||||||
private final PeerCache peerCache;
|
|
||||||
private final DFSClient dfsClient;
|
private final DFSClient dfsClient;
|
||||||
private boolean closed = false;
|
private boolean closed = false;
|
||||||
private final String src;
|
private final String src;
|
||||||
|
@ -190,8 +186,6 @@ void addZeroCopyBytes(long amt) {
|
||||||
private long totalZeroCopyBytesRead;
|
private long totalZeroCopyBytesRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final FileInputStreamCache fileInputStreamCache;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This variable tracks the number of failures since the start of the
|
* This variable tracks the number of failures since the start of the
|
||||||
* most recent user-facing operation. That is to say, it should be reset
|
* most recent user-facing operation. That is to say, it should be reset
|
||||||
|
@ -223,10 +217,6 @@ void addToDeadNodes(DatanodeInfo dnInfo) {
|
||||||
this.verifyChecksum = verifyChecksum;
|
this.verifyChecksum = verifyChecksum;
|
||||||
this.buffersize = buffersize;
|
this.buffersize = buffersize;
|
||||||
this.src = src;
|
this.src = src;
|
||||||
this.peerCache = dfsClient.peerCache;
|
|
||||||
this.fileInputStreamCache = new FileInputStreamCache(
|
|
||||||
dfsClient.getConf().shortCircuitStreamsCacheSize,
|
|
||||||
dfsClient.getConf().shortCircuitStreamsCacheExpiryMs);
|
|
||||||
this.cachingStrategy =
|
this.cachingStrategy =
|
||||||
dfsClient.getDefaultReadCachingStrategy();
|
dfsClient.getDefaultReadCachingStrategy();
|
||||||
openInfo();
|
openInfo();
|
||||||
|
@ -572,18 +562,28 @@ private synchronized DatanodeInfo blockSeekTo(long target) throws IOException {
|
||||||
try {
|
try {
|
||||||
ExtendedBlock blk = targetBlock.getBlock();
|
ExtendedBlock blk = targetBlock.getBlock();
|
||||||
Token<BlockTokenIdentifier> accessToken = targetBlock.getBlockToken();
|
Token<BlockTokenIdentifier> accessToken = targetBlock.getBlockToken();
|
||||||
blockReader = getBlockReader(targetAddr, chosenNode, src, blk,
|
blockReader = new BlockReaderFactory(dfsClient.getConf()).
|
||||||
accessToken, offsetIntoBlock, blk.getNumBytes() - offsetIntoBlock,
|
setInetSocketAddress(targetAddr).
|
||||||
buffersize, verifyChecksum, dfsClient.clientName, cachingStrategy);
|
setRemotePeerFactory(dfsClient).
|
||||||
|
setDatanodeInfo(chosenNode).
|
||||||
|
setFileName(src).
|
||||||
|
setBlock(blk).
|
||||||
|
setBlockToken(accessToken).
|
||||||
|
setStartOffset(offsetIntoBlock).
|
||||||
|
setVerifyChecksum(verifyChecksum).
|
||||||
|
setClientName(dfsClient.clientName).
|
||||||
|
setLength(blk.getNumBytes() - offsetIntoBlock).
|
||||||
|
setCachingStrategy(cachingStrategy).
|
||||||
|
setAllowShortCircuitLocalReads(!shortCircuitForbidden()).
|
||||||
|
setClientCacheContext(dfsClient.getClientContext()).
|
||||||
|
setUserGroupInformation(dfsClient.ugi).
|
||||||
|
setConfiguration(dfsClient.getConfiguration()).
|
||||||
|
build();
|
||||||
if(connectFailedOnce) {
|
if(connectFailedOnce) {
|
||||||
DFSClient.LOG.info("Successfully connected to " + targetAddr +
|
DFSClient.LOG.info("Successfully connected to " + targetAddr +
|
||||||
" for " + blk);
|
" for " + blk);
|
||||||
}
|
}
|
||||||
return chosenNode;
|
return chosenNode;
|
||||||
} catch (AccessControlException ex) {
|
|
||||||
DFSClient.LOG.warn("Short circuit access failed " + ex);
|
|
||||||
dfsClient.disableLegacyBlockReaderLocal();
|
|
||||||
continue;
|
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
if (ex instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
|
if (ex instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
|
||||||
DFSClient.LOG.info("Will fetch a new encryption key and retry, "
|
DFSClient.LOG.info("Will fetch a new encryption key and retry, "
|
||||||
|
@ -635,7 +635,6 @@ public void accept(ByteBuffer k, Object v) {
|
||||||
blockReader = null;
|
blockReader = null;
|
||||||
}
|
}
|
||||||
super.close();
|
super.close();
|
||||||
fileInputStreamCache.close();
|
|
||||||
closed = true;
|
closed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -932,9 +931,11 @@ private void fetchBlockByteRange(LocatedBlock block, long start, long end,
|
||||||
// or fetchBlockAt(). Always get the latest list of locations at the
|
// or fetchBlockAt(). Always get the latest list of locations at the
|
||||||
// start of the loop.
|
// start of the loop.
|
||||||
CachingStrategy curCachingStrategy;
|
CachingStrategy curCachingStrategy;
|
||||||
|
boolean allowShortCircuitLocalReads;
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
block = getBlockAt(block.getStartOffset(), false);
|
block = getBlockAt(block.getStartOffset(), false);
|
||||||
curCachingStrategy = cachingStrategy;
|
curCachingStrategy = cachingStrategy;
|
||||||
|
allowShortCircuitLocalReads = !shortCircuitForbidden();
|
||||||
}
|
}
|
||||||
DNAddrPair retval = chooseDataNode(block);
|
DNAddrPair retval = chooseDataNode(block);
|
||||||
DatanodeInfo chosenNode = retval.info;
|
DatanodeInfo chosenNode = retval.info;
|
||||||
|
@ -943,11 +944,24 @@ private void fetchBlockByteRange(LocatedBlock block, long start, long end,
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Token<BlockTokenIdentifier> blockToken = block.getBlockToken();
|
Token<BlockTokenIdentifier> blockToken = block.getBlockToken();
|
||||||
|
|
||||||
int len = (int) (end - start + 1);
|
int len = (int) (end - start + 1);
|
||||||
reader = getBlockReader(targetAddr, chosenNode, src, block.getBlock(),
|
reader = new BlockReaderFactory(dfsClient.getConf()).
|
||||||
blockToken, start, len, buffersize, verifyChecksum,
|
setInetSocketAddress(targetAddr).
|
||||||
dfsClient.clientName, curCachingStrategy);
|
setRemotePeerFactory(dfsClient).
|
||||||
|
setDatanodeInfo(chosenNode).
|
||||||
|
setFileName(src).
|
||||||
|
setBlock(block.getBlock()).
|
||||||
|
setBlockToken(blockToken).
|
||||||
|
setStartOffset(start).
|
||||||
|
setVerifyChecksum(verifyChecksum).
|
||||||
|
setClientName(dfsClient.clientName).
|
||||||
|
setLength(len).
|
||||||
|
setCachingStrategy(curCachingStrategy).
|
||||||
|
setAllowShortCircuitLocalReads(allowShortCircuitLocalReads).
|
||||||
|
setClientCacheContext(dfsClient.getClientContext()).
|
||||||
|
setUserGroupInformation(dfsClient.ugi).
|
||||||
|
setConfiguration(dfsClient.getConfiguration()).
|
||||||
|
build();
|
||||||
int nread = reader.readAll(buf, offset, len);
|
int nread = reader.readAll(buf, offset, len);
|
||||||
if (nread != len) {
|
if (nread != len) {
|
||||||
throw new IOException("truncated return from reader.read(): " +
|
throw new IOException("truncated return from reader.read(): " +
|
||||||
|
@ -960,10 +974,6 @@ private void fetchBlockByteRange(LocatedBlock block, long start, long end,
|
||||||
e.getPos() + " from " + chosenNode);
|
e.getPos() + " from " + chosenNode);
|
||||||
// we want to remember what we have tried
|
// we want to remember what we have tried
|
||||||
addIntoCorruptedBlockMap(block.getBlock(), chosenNode, corruptedBlockMap);
|
addIntoCorruptedBlockMap(block.getBlock(), chosenNode, corruptedBlockMap);
|
||||||
} catch (AccessControlException ex) {
|
|
||||||
DFSClient.LOG.warn("Short circuit access failed " + ex);
|
|
||||||
dfsClient.disableLegacyBlockReaderLocal();
|
|
||||||
continue;
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
if (e instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
|
if (e instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
|
||||||
DFSClient.LOG.info("Will fetch a new encryption key and retry, "
|
DFSClient.LOG.info("Will fetch a new encryption key and retry, "
|
||||||
|
@ -1022,194 +1032,6 @@ private static boolean tokenRefetchNeeded(IOException ex,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Peer newTcpPeer(InetSocketAddress addr) throws IOException {
|
|
||||||
Peer peer = null;
|
|
||||||
boolean success = false;
|
|
||||||
Socket sock = null;
|
|
||||||
try {
|
|
||||||
sock = dfsClient.socketFactory.createSocket();
|
|
||||||
NetUtils.connect(sock, addr,
|
|
||||||
dfsClient.getRandomLocalInterfaceAddr(),
|
|
||||||
dfsClient.getConf().socketTimeout);
|
|
||||||
peer = TcpPeerServer.peerFromSocketAndKey(sock,
|
|
||||||
dfsClient.getDataEncryptionKey());
|
|
||||||
success = true;
|
|
||||||
return peer;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
IOUtils.closeQuietly(peer);
|
|
||||||
IOUtils.closeQuietly(sock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieve a BlockReader suitable for reading.
|
|
||||||
* This method will reuse the cached connection to the DN if appropriate.
|
|
||||||
* Otherwise, it will create a new connection.
|
|
||||||
* Throwing an IOException from this method is basically equivalent to
|
|
||||||
* declaring the DataNode bad, so we try to connect a lot of different ways
|
|
||||||
* before doing that.
|
|
||||||
*
|
|
||||||
* @param dnAddr Address of the datanode
|
|
||||||
* @param chosenNode Chosen datanode information
|
|
||||||
* @param file File location
|
|
||||||
* @param block The Block object
|
|
||||||
* @param blockToken The access token for security
|
|
||||||
* @param startOffset The read offset, relative to block head
|
|
||||||
* @param len The number of bytes to read
|
|
||||||
* @param bufferSize The IO buffer size (not the client buffer size)
|
|
||||||
* @param verifyChecksum Whether to verify checksum
|
|
||||||
* @param clientName Client name
|
|
||||||
* @param CachingStrategy caching strategy to use
|
|
||||||
* @return New BlockReader instance
|
|
||||||
*/
|
|
||||||
protected BlockReader getBlockReader(InetSocketAddress dnAddr,
|
|
||||||
DatanodeInfo chosenNode,
|
|
||||||
String file,
|
|
||||||
ExtendedBlock block,
|
|
||||||
Token<BlockTokenIdentifier> blockToken,
|
|
||||||
long startOffset,
|
|
||||||
long len,
|
|
||||||
int bufferSize,
|
|
||||||
boolean verifyChecksum,
|
|
||||||
String clientName,
|
|
||||||
CachingStrategy curCachingStrategy)
|
|
||||||
throws IOException {
|
|
||||||
// Firstly, we check to see if we have cached any file descriptors for
|
|
||||||
// local blocks. If so, we can just re-use those file descriptors.
|
|
||||||
FileInputStream fis[] = fileInputStreamCache.get(chosenNode, block);
|
|
||||||
if (fis != null) {
|
|
||||||
if (DFSClient.LOG.isDebugEnabled()) {
|
|
||||||
DFSClient.LOG.debug("got FileInputStreams for " + block + " from " +
|
|
||||||
"the FileInputStreamCache.");
|
|
||||||
}
|
|
||||||
return new BlockReaderLocal.Builder(dfsClient.getConf()).
|
|
||||||
setFilename(file).
|
|
||||||
setBlock(block).
|
|
||||||
setStartOffset(startOffset).
|
|
||||||
setStreams(fis).
|
|
||||||
setDatanodeID(chosenNode).
|
|
||||||
setVerifyChecksum(verifyChecksum).
|
|
||||||
setBlockMetadataHeader(BlockMetadataHeader.
|
|
||||||
preadHeader(fis[1].getChannel())).
|
|
||||||
setFileInputStreamCache(fileInputStreamCache).
|
|
||||||
setCachingStrategy(curCachingStrategy).
|
|
||||||
build();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the legacy local block reader is enabled and we are reading a local
|
|
||||||
// block, try to create a BlockReaderLocalLegacy. The legacy local block
|
|
||||||
// reader implements local reads in the style first introduced by HDFS-2246.
|
|
||||||
if ((dfsClient.useLegacyBlockReaderLocal()) &&
|
|
||||||
DFSClient.isLocalAddress(dnAddr) &&
|
|
||||||
(!shortCircuitForbidden())) {
|
|
||||||
try {
|
|
||||||
return BlockReaderFactory.getLegacyBlockReaderLocal(dfsClient,
|
|
||||||
clientName, block, blockToken, chosenNode, startOffset);
|
|
||||||
} catch (IOException e) {
|
|
||||||
DFSClient.LOG.warn("error creating legacy BlockReaderLocal. " +
|
|
||||||
"Disabling legacy local reads.", e);
|
|
||||||
dfsClient.disableLegacyBlockReaderLocal();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Look for cached domain peers.
|
|
||||||
int cacheTries = 0;
|
|
||||||
DomainSocketFactory dsFactory = dfsClient.getDomainSocketFactory();
|
|
||||||
BlockReader reader = null;
|
|
||||||
final int nCachedConnRetry = dfsClient.getConf().nCachedConnRetry;
|
|
||||||
for (; cacheTries < nCachedConnRetry; ++cacheTries) {
|
|
||||||
Peer peer = peerCache.get(chosenNode, true);
|
|
||||||
if (peer == null) break;
|
|
||||||
try {
|
|
||||||
boolean allowShortCircuitLocalReads = dfsClient.getConf().
|
|
||||||
shortCircuitLocalReads && (!shortCircuitForbidden());
|
|
||||||
reader = BlockReaderFactory.newBlockReader(
|
|
||||||
dfsClient.getConf(), file, block, blockToken, startOffset,
|
|
||||||
len, verifyChecksum, clientName, peer, chosenNode,
|
|
||||||
dsFactory, peerCache, fileInputStreamCache,
|
|
||||||
allowShortCircuitLocalReads, curCachingStrategy);
|
|
||||||
return reader;
|
|
||||||
} catch (IOException ex) {
|
|
||||||
DFSClient.LOG.debug("Error making BlockReader with DomainSocket. " +
|
|
||||||
"Closing stale " + peer, ex);
|
|
||||||
} finally {
|
|
||||||
if (reader == null) {
|
|
||||||
IOUtils.closeQuietly(peer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to create a DomainPeer.
|
|
||||||
DomainSocket domSock = dsFactory.create(dnAddr, this);
|
|
||||||
if (domSock != null) {
|
|
||||||
Peer peer = new DomainPeer(domSock);
|
|
||||||
try {
|
|
||||||
boolean allowShortCircuitLocalReads = dfsClient.getConf().
|
|
||||||
shortCircuitLocalReads && (!shortCircuitForbidden());
|
|
||||||
reader = BlockReaderFactory.newBlockReader(
|
|
||||||
dfsClient.getConf(), file, block, blockToken, startOffset,
|
|
||||||
len, verifyChecksum, clientName, peer, chosenNode,
|
|
||||||
dsFactory, peerCache, fileInputStreamCache,
|
|
||||||
allowShortCircuitLocalReads, curCachingStrategy);
|
|
||||||
return reader;
|
|
||||||
} catch (IOException e) {
|
|
||||||
DFSClient.LOG.warn("failed to connect to " + domSock, e);
|
|
||||||
} finally {
|
|
||||||
if (reader == null) {
|
|
||||||
// If the Peer that we got the error from was a DomainPeer,
|
|
||||||
// mark the socket path as bad, so that newDataSocket will not try
|
|
||||||
// to re-open this socket for a while.
|
|
||||||
dsFactory.disableDomainSocketPath(domSock.getPath());
|
|
||||||
IOUtils.closeQuietly(peer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Look for cached peers.
|
|
||||||
for (; cacheTries < nCachedConnRetry; ++cacheTries) {
|
|
||||||
Peer peer = peerCache.get(chosenNode, false);
|
|
||||||
if (peer == null) break;
|
|
||||||
try {
|
|
||||||
reader = BlockReaderFactory.newBlockReader(
|
|
||||||
dfsClient.getConf(), file, block, blockToken, startOffset,
|
|
||||||
len, verifyChecksum, clientName, peer, chosenNode,
|
|
||||||
dsFactory, peerCache, fileInputStreamCache, false,
|
|
||||||
curCachingStrategy);
|
|
||||||
return reader;
|
|
||||||
} catch (IOException ex) {
|
|
||||||
DFSClient.LOG.debug("Error making BlockReader. Closing stale " +
|
|
||||||
peer, ex);
|
|
||||||
} finally {
|
|
||||||
if (reader == null) {
|
|
||||||
IOUtils.closeQuietly(peer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (tcpReadsDisabledForTesting) {
|
|
||||||
throw new IOException("TCP reads are disabled.");
|
|
||||||
}
|
|
||||||
// Try to create a new remote peer.
|
|
||||||
Peer peer = newTcpPeer(dnAddr);
|
|
||||||
try {
|
|
||||||
reader = BlockReaderFactory.newBlockReader(dfsClient.getConf(), file,
|
|
||||||
block, blockToken, startOffset, len, verifyChecksum, clientName,
|
|
||||||
peer, chosenNode, dsFactory, peerCache, fileInputStreamCache, false,
|
|
||||||
curCachingStrategy);
|
|
||||||
return reader;
|
|
||||||
} catch (IOException ex) {
|
|
||||||
DFSClient.LOG.debug(
|
|
||||||
"Exception while getting block reader, closing stale " + peer, ex);
|
|
||||||
throw ex;
|
|
||||||
} finally {
|
|
||||||
if (reader == null) {
|
|
||||||
IOUtils.closeQuietly(peer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read bytes starting from the specified position.
|
* Read bytes starting from the specified position.
|
||||||
*
|
*
|
||||||
|
@ -1555,8 +1377,7 @@ private synchronized ByteBuffer tryReadZeroCopy(int maxLength,
|
||||||
long blockStartInFile = currentLocatedBlock.getStartOffset();
|
long blockStartInFile = currentLocatedBlock.getStartOffset();
|
||||||
long blockPos = curPos - blockStartInFile;
|
long blockPos = curPos - blockStartInFile;
|
||||||
long limit = blockPos + length;
|
long limit = blockPos + length;
|
||||||
ClientMmap clientMmap =
|
ClientMmap clientMmap = blockReader.getClientMmap(opts);
|
||||||
blockReader.getClientMmap(opts, dfsClient.getMmapManager());
|
|
||||||
if (clientMmap == null) {
|
if (clientMmap == null) {
|
||||||
if (DFSClient.LOG.isDebugEnabled()) {
|
if (DFSClient.LOG.isDebugEnabled()) {
|
||||||
DFSClient.LOG.debug("unable to perform a zero-copy read from offset " +
|
DFSClient.LOG.debug("unable to perform a zero-copy read from offset " +
|
||||||
|
@ -1565,17 +1386,25 @@ private synchronized ByteBuffer tryReadZeroCopy(int maxLength,
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
seek(pos + length);
|
boolean success = false;
|
||||||
ByteBuffer buffer = clientMmap.getMappedByteBuffer().asReadOnlyBuffer();
|
ByteBuffer buffer;
|
||||||
buffer.position((int)blockPos);
|
try {
|
||||||
buffer.limit((int)limit);
|
seek(pos + length);
|
||||||
clientMmap.ref();
|
buffer = clientMmap.getMappedByteBuffer().asReadOnlyBuffer();
|
||||||
extendedReadBuffers.put(buffer, clientMmap);
|
buffer.position((int)blockPos);
|
||||||
readStatistics.addZeroCopyBytes(length);
|
buffer.limit((int)limit);
|
||||||
if (DFSClient.LOG.isDebugEnabled()) {
|
extendedReadBuffers.put(buffer, clientMmap);
|
||||||
DFSClient.LOG.debug("readZeroCopy read " + maxLength + " bytes from " +
|
readStatistics.addZeroCopyBytes(length);
|
||||||
"offset " + curPos + " via the zero-copy read path. " +
|
if (DFSClient.LOG.isDebugEnabled()) {
|
||||||
"blockEnd = " + blockEnd);
|
DFSClient.LOG.debug("readZeroCopy read " + maxLength + " bytes from " +
|
||||||
|
"offset " + curPos + " via the zero-copy read path. " +
|
||||||
|
"blockEnd = " + blockEnd);
|
||||||
|
}
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
clientMmap.unref();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,29 +27,71 @@
|
||||||
import org.apache.hadoop.hdfs.DFSClient.Conf;
|
import org.apache.hadoop.hdfs.DFSClient.Conf;
|
||||||
import org.apache.hadoop.net.unix.DomainSocket;
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.cache.Cache;
|
import com.google.common.cache.Cache;
|
||||||
import com.google.common.cache.CacheBuilder;
|
import com.google.common.cache.CacheBuilder;
|
||||||
|
|
||||||
class DomainSocketFactory {
|
class DomainSocketFactory {
|
||||||
private static final Log LOG = BlockReaderLocal.LOG;
|
private static final Log LOG = BlockReaderLocal.LOG;
|
||||||
private final Conf conf;
|
|
||||||
|
|
||||||
enum PathStatus {
|
public enum PathState {
|
||||||
UNUSABLE,
|
UNUSABLE(false, false),
|
||||||
SHORT_CIRCUIT_DISABLED,
|
SHORT_CIRCUIT_DISABLED(true, false),
|
||||||
|
VALID(true, true);
|
||||||
|
|
||||||
|
PathState(boolean usableForDataTransfer, boolean usableForShortCircuit) {
|
||||||
|
this.usableForDataTransfer = usableForDataTransfer;
|
||||||
|
this.usableForShortCircuit = usableForShortCircuit;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean getUsableForDataTransfer() {
|
||||||
|
return usableForDataTransfer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean getUsableForShortCircuit() {
|
||||||
|
return usableForShortCircuit;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final boolean usableForDataTransfer;
|
||||||
|
private final boolean usableForShortCircuit;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class PathInfo {
|
||||||
|
private final static PathInfo NOT_CONFIGURED =
|
||||||
|
new PathInfo("", PathState.UNUSABLE);
|
||||||
|
|
||||||
|
final private String path;
|
||||||
|
final private PathState state;
|
||||||
|
|
||||||
|
PathInfo(String path, PathState state) {
|
||||||
|
this.path = path;
|
||||||
|
this.state = state;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPath() {
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PathState getPathState() {
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return new StringBuilder().append("PathInfo{path=").append(path).
|
||||||
|
append(", state=").append(state).append("}").toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Information about domain socket paths.
|
* Information about domain socket paths.
|
||||||
*/
|
*/
|
||||||
Cache<String, PathStatus> pathInfo =
|
Cache<String, PathState> pathMap =
|
||||||
CacheBuilder.newBuilder()
|
CacheBuilder.newBuilder()
|
||||||
.expireAfterWrite(10, TimeUnit.MINUTES)
|
.expireAfterWrite(10, TimeUnit.MINUTES)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
public DomainSocketFactory(Conf conf) {
|
public DomainSocketFactory(Conf conf) {
|
||||||
this.conf = conf;
|
|
||||||
|
|
||||||
final String feature;
|
final String feature;
|
||||||
if (conf.shortCircuitLocalReads && (!conf.useLegacyBlockReaderLocal)) {
|
if (conf.shortCircuitLocalReads && (!conf.useLegacyBlockReaderLocal)) {
|
||||||
feature = "The short-circuit local reads feature";
|
feature = "The short-circuit local reads feature";
|
||||||
|
@ -75,51 +117,46 @@ public DomainSocketFactory(Conf conf) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a DomainSocket.
|
* Get information about a domain socket path.
|
||||||
*
|
*
|
||||||
* @param addr The address of the DataNode
|
* @param addr The inet address to use.
|
||||||
* @param stream The DFSInputStream the socket will be created for.
|
* @param conf The client configuration.
|
||||||
*
|
*
|
||||||
* @return null if the socket could not be created; the
|
* @return Information about the socket path.
|
||||||
* socket otherwise. If there was an error while
|
|
||||||
* creating the socket, we will add the socket path
|
|
||||||
* to our list of failed domain socket paths.
|
|
||||||
*/
|
*/
|
||||||
DomainSocket create(InetSocketAddress addr, DFSInputStream stream) {
|
public PathInfo getPathInfo(InetSocketAddress addr, DFSClient.Conf conf) {
|
||||||
// If there is no domain socket path configured, we can't use domain
|
// If there is no domain socket path configured, we can't use domain
|
||||||
// sockets.
|
// sockets.
|
||||||
if (conf.domainSocketPath.isEmpty()) return null;
|
if (conf.domainSocketPath.isEmpty()) return PathInfo.NOT_CONFIGURED;
|
||||||
// If we can't do anything with the domain socket, don't create it.
|
// If we can't do anything with the domain socket, don't create it.
|
||||||
if (!conf.domainSocketDataTraffic &&
|
if (!conf.domainSocketDataTraffic &&
|
||||||
(!conf.shortCircuitLocalReads || conf.useLegacyBlockReaderLocal)) {
|
(!conf.shortCircuitLocalReads || conf.useLegacyBlockReaderLocal)) {
|
||||||
return null;
|
return PathInfo.NOT_CONFIGURED;
|
||||||
}
|
}
|
||||||
// UNIX domain sockets can only be used to talk to local peers
|
|
||||||
if (!DFSClient.isLocalAddress(addr)) return null;
|
|
||||||
// If the DomainSocket code is not loaded, we can't create
|
// If the DomainSocket code is not loaded, we can't create
|
||||||
// DomainSocket objects.
|
// DomainSocket objects.
|
||||||
if (DomainSocket.getLoadingFailureReason() != null) return null;
|
if (DomainSocket.getLoadingFailureReason() != null) {
|
||||||
|
return PathInfo.NOT_CONFIGURED;
|
||||||
|
}
|
||||||
|
// UNIX domain sockets can only be used to talk to local peers
|
||||||
|
if (!DFSClient.isLocalAddress(addr)) return PathInfo.NOT_CONFIGURED;
|
||||||
String escapedPath = DomainSocket.
|
String escapedPath = DomainSocket.
|
||||||
getEffectivePath(conf.domainSocketPath, addr.getPort());
|
getEffectivePath(conf.domainSocketPath, addr.getPort());
|
||||||
PathStatus info = pathInfo.getIfPresent(escapedPath);
|
PathState status = pathMap.getIfPresent(escapedPath);
|
||||||
if (info == PathStatus.UNUSABLE) {
|
if (status == null) {
|
||||||
// We tried to connect to this domain socket before, and it was totally
|
return new PathInfo(escapedPath, PathState.VALID);
|
||||||
// unusable.
|
} else {
|
||||||
return null;
|
return new PathInfo(escapedPath, status);
|
||||||
}
|
|
||||||
if ((!conf.domainSocketDataTraffic) &&
|
|
||||||
((info == PathStatus.SHORT_CIRCUIT_DISABLED) ||
|
|
||||||
stream.shortCircuitForbidden())) {
|
|
||||||
// If we don't want to pass data over domain sockets, and we don't want
|
|
||||||
// to pass file descriptors over them either, we have no use for domain
|
|
||||||
// sockets.
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public DomainSocket createSocket(PathInfo info, int socketTimeout) {
|
||||||
|
Preconditions.checkArgument(info.getPathState() != PathState.UNUSABLE);
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
DomainSocket sock = null;
|
DomainSocket sock = null;
|
||||||
try {
|
try {
|
||||||
sock = DomainSocket.connect(escapedPath);
|
sock = DomainSocket.connect(info.getPath());
|
||||||
sock.setAttribute(DomainSocket.RECEIVE_TIMEOUT, conf.socketTimeout);
|
sock.setAttribute(DomainSocket.RECEIVE_TIMEOUT, socketTimeout);
|
||||||
success = true;
|
success = true;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.warn("error creating DomainSocket", e);
|
LOG.warn("error creating DomainSocket", e);
|
||||||
|
@ -129,7 +166,7 @@ DomainSocket create(InetSocketAddress addr, DFSInputStream stream) {
|
||||||
if (sock != null) {
|
if (sock != null) {
|
||||||
IOUtils.closeQuietly(sock);
|
IOUtils.closeQuietly(sock);
|
||||||
}
|
}
|
||||||
pathInfo.put(escapedPath, PathStatus.UNUSABLE);
|
pathMap.put(info.getPath(), PathState.UNUSABLE);
|
||||||
sock = null;
|
sock = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -137,10 +174,10 @@ DomainSocket create(InetSocketAddress addr, DFSInputStream stream) {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void disableShortCircuitForPath(String path) {
|
public void disableShortCircuitForPath(String path) {
|
||||||
pathInfo.put(path, PathStatus.SHORT_CIRCUIT_DISABLED);
|
pathMap.put(path, PathState.SHORT_CIRCUIT_DISABLED);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void disableDomainSocketPath(String path) {
|
public void disableDomainSocketPath(String path) {
|
||||||
pathInfo.put(path, PathStatus.UNUSABLE);
|
pathMap.put(path, PathState.UNUSABLE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.builder.EqualsBuilder;
|
||||||
|
import org.apache.commons.lang.builder.HashCodeBuilder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An immutable key which identifies a block.
|
||||||
|
*/
|
||||||
|
final public class ExtendedBlockId {
|
||||||
|
/**
|
||||||
|
* The block ID for this block.
|
||||||
|
*/
|
||||||
|
private final long blockId;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The block pool ID for this block.
|
||||||
|
*/
|
||||||
|
private final String bpId;
|
||||||
|
|
||||||
|
public ExtendedBlockId(long blockId, String bpId) {
|
||||||
|
this.blockId = blockId;
|
||||||
|
this.bpId = bpId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getBlockId() {
|
||||||
|
return this.blockId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getBlockPoolId() {
|
||||||
|
return this.bpId;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if ((o == null) || (o.getClass() != this.getClass())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ExtendedBlockId other = (ExtendedBlockId)o;
|
||||||
|
return new EqualsBuilder().
|
||||||
|
append(blockId, other.blockId).
|
||||||
|
append(bpId, other.bpId).
|
||||||
|
isEquals();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return new HashCodeBuilder().
|
||||||
|
append(this.blockId).
|
||||||
|
append(this.bpId).
|
||||||
|
toHashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return new StringBuilder().append(blockId).
|
||||||
|
append("_").append(bpId).toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,287 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.hdfs;
|
|
||||||
|
|
||||||
import java.io.Closeable;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.lang.ref.WeakReference;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.concurrent.ScheduledFuture;
|
|
||||||
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.io.IOUtils;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
|
|
||||||
import com.google.common.collect.LinkedListMultimap;
|
|
||||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* FileInputStream cache is used to cache FileInputStream objects that we
|
|
||||||
* have received from the DataNode.
|
|
||||||
*/
|
|
||||||
class FileInputStreamCache {
|
|
||||||
private final static Log LOG = LogFactory.getLog(FileInputStreamCache.class);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The executor service that runs the cacheCleaner. There is only one of
|
|
||||||
* these per VM.
|
|
||||||
*/
|
|
||||||
private final static ScheduledThreadPoolExecutor executor
|
|
||||||
= new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder().
|
|
||||||
setDaemon(true).setNameFormat("FileInputStreamCache Cleaner").
|
|
||||||
build());
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The CacheCleaner for this FileInputStreamCache. We don't create this
|
|
||||||
* and schedule it until it becomes necessary.
|
|
||||||
*/
|
|
||||||
private CacheCleaner cacheCleaner;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Maximum number of entries to allow in the cache.
|
|
||||||
*/
|
|
||||||
private final int maxCacheSize;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The minimum time in milliseconds to preserve an element in the cache.
|
|
||||||
*/
|
|
||||||
private final long expiryTimeMs;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* True if the FileInputStreamCache is closed.
|
|
||||||
*/
|
|
||||||
private boolean closed = false;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Cache entries.
|
|
||||||
*/
|
|
||||||
private final LinkedListMultimap<Key, Value> map = LinkedListMultimap.create();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Expiry thread which makes sure that the file descriptors get closed
|
|
||||||
* after a while.
|
|
||||||
*/
|
|
||||||
private static class CacheCleaner implements Runnable, Closeable {
|
|
||||||
private WeakReference<FileInputStreamCache> cacheRef;
|
|
||||||
private ScheduledFuture<?> future;
|
|
||||||
|
|
||||||
CacheCleaner(FileInputStreamCache cache) {
|
|
||||||
this.cacheRef = new WeakReference<FileInputStreamCache>(cache);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
FileInputStreamCache cache = cacheRef.get();
|
|
||||||
if (cache == null) return;
|
|
||||||
synchronized(cache) {
|
|
||||||
if (cache.closed) return;
|
|
||||||
long curTime = Time.monotonicNow();
|
|
||||||
for (Iterator<Entry<Key, Value>> iter =
|
|
||||||
cache.map.entries().iterator(); iter.hasNext();
|
|
||||||
iter = cache.map.entries().iterator()) {
|
|
||||||
Entry<Key, Value> entry = iter.next();
|
|
||||||
if (entry.getValue().getTime() + cache.expiryTimeMs >= curTime) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
entry.getValue().close();
|
|
||||||
iter.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
if (future != null) {
|
|
||||||
future.cancel(false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFuture(ScheduledFuture<?> future) {
|
|
||||||
this.future = future;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The key identifying a FileInputStream array.
|
|
||||||
*/
|
|
||||||
static class Key {
|
|
||||||
private final DatanodeID datanodeID;
|
|
||||||
private final ExtendedBlock block;
|
|
||||||
|
|
||||||
public Key(DatanodeID datanodeID, ExtendedBlock block) {
|
|
||||||
this.datanodeID = datanodeID;
|
|
||||||
this.block = block;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object other) {
|
|
||||||
if (!(other instanceof FileInputStreamCache.Key)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
FileInputStreamCache.Key otherKey = (FileInputStreamCache.Key)other;
|
|
||||||
return (block.equals(otherKey.block) &&
|
|
||||||
(block.getGenerationStamp() == otherKey.block.getGenerationStamp()) &&
|
|
||||||
datanodeID.equals(otherKey.datanodeID));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return block.hashCode();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The value containing a FileInputStream array and the time it was added to
|
|
||||||
* the cache.
|
|
||||||
*/
|
|
||||||
static class Value {
|
|
||||||
private final FileInputStream fis[];
|
|
||||||
private final long time;
|
|
||||||
|
|
||||||
public Value (FileInputStream fis[]) {
|
|
||||||
this.fis = fis;
|
|
||||||
this.time = Time.monotonicNow();
|
|
||||||
}
|
|
||||||
|
|
||||||
public FileInputStream[] getFileInputStreams() {
|
|
||||||
return fis;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getTime() {
|
|
||||||
return time;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() {
|
|
||||||
IOUtils.cleanup(LOG, fis);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new FileInputStream
|
|
||||||
*
|
|
||||||
* @param maxCacheSize The maximum number of elements to allow in
|
|
||||||
* the cache.
|
|
||||||
* @param expiryTimeMs The minimum time in milliseconds to preserve
|
|
||||||
* elements in the cache.
|
|
||||||
*/
|
|
||||||
public FileInputStreamCache(int maxCacheSize, long expiryTimeMs) {
|
|
||||||
this.maxCacheSize = maxCacheSize;
|
|
||||||
this.expiryTimeMs = expiryTimeMs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Put an array of FileInputStream objects into the cache.
|
|
||||||
*
|
|
||||||
* @param datanodeID The DatanodeID to store the streams under.
|
|
||||||
* @param block The Block to store the streams under.
|
|
||||||
* @param fis The streams.
|
|
||||||
*/
|
|
||||||
public void put(DatanodeID datanodeID, ExtendedBlock block,
|
|
||||||
FileInputStream fis[]) {
|
|
||||||
boolean inserted = false;
|
|
||||||
try {
|
|
||||||
synchronized(this) {
|
|
||||||
if (closed) return;
|
|
||||||
if (map.size() + 1 > maxCacheSize) {
|
|
||||||
Iterator<Entry<Key, Value>> iter = map.entries().iterator();
|
|
||||||
if (!iter.hasNext()) return;
|
|
||||||
Entry<Key, Value> entry = iter.next();
|
|
||||||
entry.getValue().close();
|
|
||||||
iter.remove();
|
|
||||||
}
|
|
||||||
if (cacheCleaner == null) {
|
|
||||||
cacheCleaner = new CacheCleaner(this);
|
|
||||||
ScheduledFuture<?> future =
|
|
||||||
executor.scheduleAtFixedRate(cacheCleaner, expiryTimeMs, expiryTimeMs,
|
|
||||||
TimeUnit.MILLISECONDS);
|
|
||||||
cacheCleaner.setFuture(future);
|
|
||||||
}
|
|
||||||
map.put(new Key(datanodeID, block), new Value(fis));
|
|
||||||
inserted = true;
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
if (!inserted) {
|
|
||||||
IOUtils.cleanup(LOG, fis);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find and remove an array of FileInputStream objects from the cache.
|
|
||||||
*
|
|
||||||
* @param datanodeID The DatanodeID to search for.
|
|
||||||
* @param block The Block to search for.
|
|
||||||
*
|
|
||||||
* @return null if no streams can be found; the
|
|
||||||
* array otherwise. If this is non-null, the
|
|
||||||
* array will have been removed from the cache.
|
|
||||||
*/
|
|
||||||
public synchronized FileInputStream[] get(DatanodeID datanodeID,
|
|
||||||
ExtendedBlock block) {
|
|
||||||
Key key = new Key(datanodeID, block);
|
|
||||||
List<Value> ret = map.get(key);
|
|
||||||
if (ret.isEmpty()) return null;
|
|
||||||
Value val = ret.get(0);
|
|
||||||
map.remove(key, val);
|
|
||||||
return val.getFileInputStreams();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Close the cache and free all associated resources.
|
|
||||||
*/
|
|
||||||
public synchronized void close() {
|
|
||||||
if (closed) return;
|
|
||||||
closed = true;
|
|
||||||
IOUtils.cleanup(LOG, cacheCleaner);
|
|
||||||
for (Iterator<Entry<Key, Value>> iter = map.entries().iterator();
|
|
||||||
iter.hasNext();) {
|
|
||||||
Entry<Key, Value> entry = iter.next();
|
|
||||||
entry.getValue().close();
|
|
||||||
iter.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized String toString() {
|
|
||||||
StringBuilder bld = new StringBuilder();
|
|
||||||
bld.append("FileInputStreamCache(");
|
|
||||||
String prefix = "";
|
|
||||||
for (Entry<Key, Value> entry : map.entries()) {
|
|
||||||
bld.append(prefix);
|
|
||||||
bld.append(entry.getKey());
|
|
||||||
prefix = ", ";
|
|
||||||
}
|
|
||||||
bld.append(")");
|
|
||||||
return bld.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getExpiryTimeMs() {
|
|
||||||
return expiryTimeMs;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getMaxCacheSize() {
|
|
||||||
return maxCacheSize;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -89,42 +89,19 @@ long getTime() {
|
||||||
LinkedListMultimap.create();
|
LinkedListMultimap.create();
|
||||||
private final int capacity;
|
private final int capacity;
|
||||||
private final long expiryPeriod;
|
private final long expiryPeriod;
|
||||||
private static PeerCache instance = null;
|
|
||||||
|
|
||||||
@VisibleForTesting
|
public PeerCache(int c, long e) {
|
||||||
PeerCache(int c, long e) {
|
|
||||||
this.capacity = c;
|
this.capacity = c;
|
||||||
this.expiryPeriod = e;
|
this.expiryPeriod = e;
|
||||||
|
|
||||||
if (capacity == 0 ) {
|
if (capacity == 0 ) {
|
||||||
LOG.info("SocketCache disabled.");
|
LOG.info("SocketCache disabled.");
|
||||||
}
|
} else if (expiryPeriod == 0) {
|
||||||
else if (expiryPeriod == 0) {
|
|
||||||
throw new IllegalStateException("Cannot initialize expiryPeriod to " +
|
throw new IllegalStateException("Cannot initialize expiryPeriod to " +
|
||||||
expiryPeriod + "when cache is enabled.");
|
expiryPeriod + " when cache is enabled.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static synchronized PeerCache getInstance(int c, long e) {
|
|
||||||
// capacity is only initialized once
|
|
||||||
if (instance == null) {
|
|
||||||
instance = new PeerCache(c, e);
|
|
||||||
} else { //already initialized once
|
|
||||||
if (instance.capacity != c || instance.expiryPeriod != e) {
|
|
||||||
LOG.info("capacity and expiry periods already set to " +
|
|
||||||
instance.capacity + " and " + instance.expiryPeriod +
|
|
||||||
" respectively. Cannot set it to " + c + " and " + e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return instance;
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
public static synchronized void setInstance(int c, long e) {
|
|
||||||
instance = new PeerCache(c, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isDaemonStarted() {
|
private boolean isDaemonStarted() {
|
||||||
return (daemon == null)? false: true;
|
return (daemon == null)? false: true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,6 @@
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.ReadOption;
|
import org.apache.hadoop.fs.ReadOption;
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmap;
|
import org.apache.hadoop.hdfs.client.ClientMmap;
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmapManager;
|
|
||||||
import org.apache.hadoop.hdfs.net.Peer;
|
import org.apache.hadoop.hdfs.net.Peer;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
|
@ -492,8 +491,7 @@ public boolean isShortCircuit() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ClientMmap getClientMmap(EnumSet<ReadOption> opts,
|
public ClientMmap getClientMmap(EnumSet<ReadOption> opts) {
|
||||||
ClientMmapManager mmapManager) {
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,7 +32,6 @@
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.fs.ReadOption;
|
import org.apache.hadoop.fs.ReadOption;
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmap;
|
import org.apache.hadoop.hdfs.client.ClientMmap;
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmapManager;
|
|
||||||
import org.apache.hadoop.hdfs.net.Peer;
|
import org.apache.hadoop.hdfs.net.Peer;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
|
@ -457,8 +456,7 @@ public boolean isShortCircuit() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ClientMmap getClientMmap(EnumSet<ReadOption> opts,
|
public ClientMmap getClientMmap(EnumSet<ReadOption> opts) {
|
||||||
ClientMmapManager mmapManager) {
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.net.Peer;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
|
||||||
|
public interface RemotePeerFactory {
|
||||||
|
/**
|
||||||
|
* @param addr The address to connect to.
|
||||||
|
*
|
||||||
|
* @return A new Peer connected to the address.
|
||||||
|
*
|
||||||
|
* @throws IOException If there was an error connecting or creating
|
||||||
|
* the remote socket, encrypted stream, etc.
|
||||||
|
*/
|
||||||
|
Peer newConnectedPeer(InetSocketAddress addr) throws IOException;
|
||||||
|
}
|
|
@ -17,24 +17,14 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.client;
|
package org.apache.hadoop.hdfs.client;
|
||||||
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.io.nativeio.NativeIO;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.lang.ref.WeakReference;
|
|
||||||
import java.nio.MappedByteBuffer;
|
import java.nio.MappedByteBuffer;
|
||||||
import java.nio.channels.FileChannel.MapMode;
|
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A memory-mapped region used by an HDFS client.
|
* A memory-mapped region used by an HDFS client.
|
||||||
*
|
*
|
||||||
|
@ -46,111 +36,46 @@ public class ClientMmap {
|
||||||
static final Log LOG = LogFactory.getLog(ClientMmap.class);
|
static final Log LOG = LogFactory.getLog(ClientMmap.class);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A reference to the manager of this mmap.
|
* A reference to the block replica which this mmap relates to.
|
||||||
*
|
|
||||||
* This is only a weak reference to help minimize the damange done by
|
|
||||||
* code which leaks references accidentally.
|
|
||||||
*/
|
*/
|
||||||
private final WeakReference<ClientMmapManager> manager;
|
private final ShortCircuitReplica replica;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The actual mapped memory region.
|
* The java ByteBuffer object.
|
||||||
*/
|
*/
|
||||||
private final MappedByteBuffer map;
|
private final MappedByteBuffer map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A reference count tracking how many threads are using this object.
|
* Reference count of this ClientMmap object.
|
||||||
*/
|
*/
|
||||||
private final AtomicInteger refCount = new AtomicInteger(1);
|
private final AtomicInteger refCount = new AtomicInteger(1);
|
||||||
|
|
||||||
/**
|
ClientMmap(ShortCircuitReplica replica, MappedByteBuffer map) {
|
||||||
* Block pertaining to this mmap
|
this.replica = replica;
|
||||||
*/
|
|
||||||
private final ExtendedBlock block;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The DataNode where this mmap came from.
|
|
||||||
*/
|
|
||||||
private final DatanodeID datanodeID;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The monotonic time when this mmap was last evictable.
|
|
||||||
*/
|
|
||||||
private long lastEvictableTimeNs;
|
|
||||||
|
|
||||||
public static ClientMmap load(ClientMmapManager manager, FileInputStream in,
|
|
||||||
ExtendedBlock block, DatanodeID datanodeID)
|
|
||||||
throws IOException {
|
|
||||||
MappedByteBuffer map =
|
|
||||||
in.getChannel().map(MapMode.READ_ONLY, 0,
|
|
||||||
in.getChannel().size());
|
|
||||||
return new ClientMmap(manager, map, block, datanodeID);
|
|
||||||
}
|
|
||||||
|
|
||||||
private ClientMmap(ClientMmapManager manager, MappedByteBuffer map,
|
|
||||||
ExtendedBlock block, DatanodeID datanodeID)
|
|
||||||
throws IOException {
|
|
||||||
this.manager = new WeakReference<ClientMmapManager>(manager);
|
|
||||||
this.map = map;
|
this.map = map;
|
||||||
this.block = block;
|
|
||||||
this.datanodeID = datanodeID;
|
|
||||||
this.lastEvictableTimeNs = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decrement the reference count on this object.
|
* Increment the reference count.
|
||||||
* Should be called with the ClientMmapManager lock held.
|
*
|
||||||
|
* @return The new reference count.
|
||||||
|
*/
|
||||||
|
void ref() {
|
||||||
|
refCount.addAndGet(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decrement the reference count.
|
||||||
|
*
|
||||||
|
* The parent replica gets unreferenced each time the reference count
|
||||||
|
* of this object goes to 0.
|
||||||
*/
|
*/
|
||||||
public void unref() {
|
public void unref() {
|
||||||
int count = refCount.decrementAndGet();
|
refCount.addAndGet(-1);
|
||||||
if (count < 0) {
|
replica.unref();
|
||||||
throw new IllegalArgumentException("can't decrement the " +
|
|
||||||
"reference count on this ClientMmap lower than 0.");
|
|
||||||
} else if (count == 0) {
|
|
||||||
ClientMmapManager man = manager.get();
|
|
||||||
if (man == null) {
|
|
||||||
unmap();
|
|
||||||
} else {
|
|
||||||
man.makeEvictable(this);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Increment the reference count on this object.
|
|
||||||
*
|
|
||||||
* @return The new reference count.
|
|
||||||
*/
|
|
||||||
public int ref() {
|
|
||||||
return refCount.getAndIncrement();
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
public ExtendedBlock getBlock() {
|
|
||||||
return block;
|
|
||||||
}
|
|
||||||
|
|
||||||
DatanodeID getDatanodeID() {
|
|
||||||
return datanodeID;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public MappedByteBuffer getMappedByteBuffer() {
|
public MappedByteBuffer getMappedByteBuffer() {
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLastEvictableTimeNs(long lastEvictableTimeNs) {
|
|
||||||
this.lastEvictableTimeNs = lastEvictableTimeNs;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getLastEvictableTimeNs() {
|
|
||||||
return this.lastEvictableTimeNs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Unmap the memory region.
|
|
||||||
*/
|
|
||||||
void unmap() {
|
|
||||||
assert(refCount.get() == 0);
|
|
||||||
NativeIO.POSIX.munmap(map);
|
|
||||||
}
|
|
||||||
}
|
}
|
|
@ -1,482 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.hdfs.client;
|
|
||||||
|
|
||||||
import java.io.Closeable;
|
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
|
||||||
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.lang.ref.WeakReference;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.TreeMap;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.concurrent.ScheduledFuture;
|
|
||||||
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.util.concurrent.locks.Condition;
|
|
||||||
import java.util.concurrent.locks.Lock;
|
|
||||||
import java.util.concurrent.locks.ReentrantLock;
|
|
||||||
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE;
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT;
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS;
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT;
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT;
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.io.IOUtils;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.collect.ComparisonChain;
|
|
||||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tracks mmap instances used on an HDFS client.
|
|
||||||
*
|
|
||||||
* mmaps can be used concurrently by multiple threads at once.
|
|
||||||
* mmaps cannot be closed while they are in use.
|
|
||||||
*
|
|
||||||
* The cache is important for performance, because the first time an mmap is
|
|
||||||
* created, the page table entries (PTEs) are not yet set up.
|
|
||||||
* Even when reading data that is entirely resident in memory, reading an
|
|
||||||
* mmap the second time is faster.
|
|
||||||
*/
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
public class ClientMmapManager implements Closeable {
|
|
||||||
public static final Log LOG = LogFactory.getLog(ClientMmapManager.class);
|
|
||||||
|
|
||||||
private boolean closed = false;
|
|
||||||
|
|
||||||
private final int cacheSize;
|
|
||||||
|
|
||||||
private final long timeoutNs;
|
|
||||||
|
|
||||||
private final int runsPerTimeout;
|
|
||||||
|
|
||||||
private final Lock lock = new ReentrantLock();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Maps block, datanode_id to the client mmap object.
|
|
||||||
* If the ClientMmap is in the process of being loaded,
|
|
||||||
* {@link Waitable<ClientMmap>#await()} will block.
|
|
||||||
*
|
|
||||||
* Protected by the ClientMmapManager lock.
|
|
||||||
*/
|
|
||||||
private final TreeMap<Key, Waitable<ClientMmap>> mmaps =
|
|
||||||
new TreeMap<Key, Waitable<ClientMmap>>();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Maps the last use time to the client mmap object.
|
|
||||||
* We ensure that each last use time is unique by inserting a jitter of a
|
|
||||||
* nanosecond or two if necessary.
|
|
||||||
*
|
|
||||||
* Protected by the ClientMmapManager lock.
|
|
||||||
* ClientMmap objects that are in use are never evictable.
|
|
||||||
*/
|
|
||||||
private final TreeMap<Long, ClientMmap> evictable =
|
|
||||||
new TreeMap<Long, ClientMmap>();
|
|
||||||
|
|
||||||
private final ScheduledThreadPoolExecutor executor =
|
|
||||||
new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder().
|
|
||||||
setDaemon(true).setNameFormat("ClientMmapManager").
|
|
||||||
build());
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The CacheCleaner for this ClientMmapManager. We don't create this
|
|
||||||
* and schedule it until it becomes necessary.
|
|
||||||
*/
|
|
||||||
private CacheCleaner cacheCleaner;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Factory method to create a ClientMmapManager from a Hadoop
|
|
||||||
* configuration.
|
|
||||||
*/
|
|
||||||
public static ClientMmapManager fromConf(Configuration conf) {
|
|
||||||
return new ClientMmapManager(conf.getInt(DFS_CLIENT_MMAP_CACHE_SIZE,
|
|
||||||
DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT),
|
|
||||||
conf.getLong(DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS,
|
|
||||||
DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT),
|
|
||||||
conf.getInt(DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT,
|
|
||||||
DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT));
|
|
||||||
}
|
|
||||||
|
|
||||||
public ClientMmapManager(int cacheSize, long timeoutMs, int runsPerTimeout) {
|
|
||||||
this.cacheSize = cacheSize;
|
|
||||||
this.timeoutNs = timeoutMs * 1000000;
|
|
||||||
this.runsPerTimeout = runsPerTimeout;
|
|
||||||
}
|
|
||||||
|
|
||||||
long getTimeoutMs() {
|
|
||||||
return this.timeoutNs / 1000000;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getRunsPerTimeout() {
|
|
||||||
return this.runsPerTimeout;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String verifyConfigurationMatches(Configuration conf) {
|
|
||||||
StringBuilder bld = new StringBuilder();
|
|
||||||
int cacheSize = conf.getInt(DFS_CLIENT_MMAP_CACHE_SIZE,
|
|
||||||
DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT);
|
|
||||||
if (this.cacheSize != cacheSize) {
|
|
||||||
bld.append("You specified a cache size of ").append(cacheSize).
|
|
||||||
append(", but the existing cache size is ").append(this.cacheSize).
|
|
||||||
append(". ");
|
|
||||||
}
|
|
||||||
long timeoutMs = conf.getLong(DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS,
|
|
||||||
DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT);
|
|
||||||
if (getTimeoutMs() != timeoutMs) {
|
|
||||||
bld.append("You specified a cache timeout of ").append(timeoutMs).
|
|
||||||
append(" ms, but the existing cache timeout is ").
|
|
||||||
append(getTimeoutMs()).append("ms").append(". ");
|
|
||||||
}
|
|
||||||
int runsPerTimeout = conf.getInt(
|
|
||||||
DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT,
|
|
||||||
DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT);
|
|
||||||
if (getRunsPerTimeout() != runsPerTimeout) {
|
|
||||||
bld.append("You specified ").append(runsPerTimeout).
|
|
||||||
append(" runs per timeout, but the existing runs per timeout is ").
|
|
||||||
append(getTimeoutMs()).append(". ");
|
|
||||||
}
|
|
||||||
return bld.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class Waitable<T> {
|
|
||||||
private T val;
|
|
||||||
private final Condition cond;
|
|
||||||
|
|
||||||
public Waitable(Condition cond) {
|
|
||||||
this.val = null;
|
|
||||||
this.cond = cond;
|
|
||||||
}
|
|
||||||
|
|
||||||
public T await() throws InterruptedException {
|
|
||||||
while (this.val == null) {
|
|
||||||
this.cond.await();
|
|
||||||
}
|
|
||||||
return this.val;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void provide(T val) {
|
|
||||||
this.val = val;
|
|
||||||
this.cond.signalAll();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class Key implements Comparable<Key> {
|
|
||||||
private final ExtendedBlock block;
|
|
||||||
private final DatanodeID datanode;
|
|
||||||
|
|
||||||
Key(ExtendedBlock block, DatanodeID datanode) {
|
|
||||||
this.block = block;
|
|
||||||
this.datanode = datanode;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compare two ClientMmap regions that we're storing.
|
|
||||||
*
|
|
||||||
* When we append to a block, we bump the genstamp. It is important to
|
|
||||||
* compare the genStamp here. That way, we will not return a shorter
|
|
||||||
* mmap than required.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public int compareTo(Key o) {
|
|
||||||
return ComparisonChain.start().
|
|
||||||
compare(block.getBlockId(), o.block.getBlockId()).
|
|
||||||
compare(block.getGenerationStamp(), o.block.getGenerationStamp()).
|
|
||||||
compare(block.getBlockPoolId(), o.block.getBlockPoolId()).
|
|
||||||
compare(datanode, o.datanode).
|
|
||||||
result();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object rhs) {
|
|
||||||
if (rhs == null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
Key o = (Key)rhs;
|
|
||||||
return (compareTo(o) == 0);
|
|
||||||
} catch (ClassCastException e) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return block.hashCode() ^ datanode.hashCode();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Thread which handles expiring mmaps from the cache.
|
|
||||||
*/
|
|
||||||
private static class CacheCleaner implements Runnable, Closeable {
|
|
||||||
private WeakReference<ClientMmapManager> managerRef;
|
|
||||||
private ScheduledFuture<?> future;
|
|
||||||
|
|
||||||
CacheCleaner(ClientMmapManager manager) {
|
|
||||||
this.managerRef= new WeakReference<ClientMmapManager>(manager);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
ClientMmapManager manager = managerRef.get();
|
|
||||||
if (manager == null) return;
|
|
||||||
long curTime = System.nanoTime();
|
|
||||||
try {
|
|
||||||
manager.lock.lock();
|
|
||||||
manager.evictStaleEntries(curTime);
|
|
||||||
} finally {
|
|
||||||
manager.lock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void setFuture(ScheduledFuture<?> future) {
|
|
||||||
this.future = future;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
future.cancel(false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Evict entries which are older than curTime + timeoutNs from the cache.
|
|
||||||
*
|
|
||||||
* NOTE: you must call this function with the lock held.
|
|
||||||
*/
|
|
||||||
private void evictStaleEntries(long curTime) {
|
|
||||||
if (closed) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
Iterator<Entry<Long, ClientMmap>> iter =
|
|
||||||
evictable.entrySet().iterator();
|
|
||||||
while (iter.hasNext()) {
|
|
||||||
Entry<Long, ClientMmap> entry = iter.next();
|
|
||||||
if (entry.getKey() + timeoutNs >= curTime) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
ClientMmap mmap = entry.getValue();
|
|
||||||
Key key = new Key(mmap.getBlock(), mmap.getDatanodeID());
|
|
||||||
mmaps.remove(key);
|
|
||||||
iter.remove();
|
|
||||||
mmap.unmap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Evict one mmap object from the cache.
|
|
||||||
*
|
|
||||||
* NOTE: you must call this function with the lock held.
|
|
||||||
*
|
|
||||||
* @return True if an object was evicted; false if none
|
|
||||||
* could be evicted.
|
|
||||||
*/
|
|
||||||
private boolean evictOne() {
|
|
||||||
Entry<Long, ClientMmap> entry = evictable.pollFirstEntry();
|
|
||||||
if (entry == null) {
|
|
||||||
// We don't want to try creating another mmap region, because the
|
|
||||||
// cache is full.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
ClientMmap evictedMmap = entry.getValue();
|
|
||||||
Key evictedKey = new Key(evictedMmap.getBlock(),
|
|
||||||
evictedMmap.getDatanodeID());
|
|
||||||
mmaps.remove(evictedKey);
|
|
||||||
evictedMmap.unmap();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new mmap object.
|
|
||||||
*
|
|
||||||
* NOTE: you must call this function with the lock held.
|
|
||||||
*
|
|
||||||
* @param key The key which describes this mmap.
|
|
||||||
* @param in The input stream to use to create the mmap.
|
|
||||||
* @return The new mmap object, or null if there were
|
|
||||||
* insufficient resources.
|
|
||||||
* @throws IOException If there was an I/O error creating the mmap.
|
|
||||||
*/
|
|
||||||
private ClientMmap create(Key key, FileInputStream in) throws IOException {
|
|
||||||
if (mmaps.size() + 1 > cacheSize) {
|
|
||||||
if (!evictOne()) {
|
|
||||||
LOG.warn("mmap cache is full (with " + cacheSize + " elements) and " +
|
|
||||||
"nothing is evictable. Ignoring request for mmap with " +
|
|
||||||
"datanodeID=" + key.datanode + ", " + "block=" + key.block);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Create the condition variable that other threads may wait on.
|
|
||||||
Waitable<ClientMmap> waitable =
|
|
||||||
new Waitable<ClientMmap>(lock.newCondition());
|
|
||||||
mmaps.put(key, waitable);
|
|
||||||
// Load the entry
|
|
||||||
boolean success = false;
|
|
||||||
ClientMmap mmap = null;
|
|
||||||
try {
|
|
||||||
try {
|
|
||||||
lock.unlock();
|
|
||||||
mmap = ClientMmap.load(this, in, key.block, key.datanode);
|
|
||||||
} finally {
|
|
||||||
lock.lock();
|
|
||||||
}
|
|
||||||
if (cacheCleaner == null) {
|
|
||||||
cacheCleaner = new CacheCleaner(this);
|
|
||||||
ScheduledFuture<?> future =
|
|
||||||
executor.scheduleAtFixedRate(cacheCleaner,
|
|
||||||
timeoutNs, timeoutNs / runsPerTimeout, TimeUnit.NANOSECONDS);
|
|
||||||
cacheCleaner.setFuture(future);
|
|
||||||
}
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
LOG.warn("failed to create mmap for datanodeID=" + key.datanode +
|
|
||||||
", " + "block=" + key.block);
|
|
||||||
mmaps.remove(key);
|
|
||||||
}
|
|
||||||
waitable.provide(mmap);
|
|
||||||
}
|
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.info("created a new ClientMmap for block " + key.block +
|
|
||||||
" on datanode " + key.datanode);
|
|
||||||
}
|
|
||||||
return mmap;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get or create an mmap region.
|
|
||||||
*
|
|
||||||
* @param node The DataNode that owns the block for this mmap region.
|
|
||||||
* @param block The block ID, block pool ID, and generation stamp of
|
|
||||||
* the block we want to read.
|
|
||||||
* @param in An open file for this block. This stream is only used
|
|
||||||
* if we have to create a new mmap; if we use an
|
|
||||||
* existing one, it is ignored.
|
|
||||||
*
|
|
||||||
* @return The client mmap region.
|
|
||||||
*/
|
|
||||||
public ClientMmap fetch(DatanodeID datanodeID, ExtendedBlock block,
|
|
||||||
FileInputStream in) throws IOException, InterruptedException {
|
|
||||||
LOG.debug("fetching mmap with datanodeID=" + datanodeID + ", " +
|
|
||||||
"block=" + block);
|
|
||||||
Key key = new Key(block, datanodeID);
|
|
||||||
ClientMmap mmap = null;
|
|
||||||
try {
|
|
||||||
lock.lock();
|
|
||||||
if (closed) {
|
|
||||||
throw new IOException("ClientMmapManager is closed.");
|
|
||||||
}
|
|
||||||
while (mmap == null) {
|
|
||||||
Waitable<ClientMmap> entry = mmaps.get(key);
|
|
||||||
if (entry == null) {
|
|
||||||
return create(key, in);
|
|
||||||
}
|
|
||||||
mmap = entry.await();
|
|
||||||
}
|
|
||||||
if (mmap.ref() == 1) {
|
|
||||||
// When going from nobody using the mmap (ref = 0) to somebody
|
|
||||||
// using the mmap (ref = 1), we must make the mmap un-evictable.
|
|
||||||
evictable.remove(mmap.getLastEvictableTimeNs());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
lock.unlock();
|
|
||||||
}
|
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug("reusing existing mmap with datanodeID=" + datanodeID +
|
|
||||||
", " + "block=" + block);
|
|
||||||
}
|
|
||||||
return mmap;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Make an mmap evictable.
|
|
||||||
*
|
|
||||||
* When an mmap is evictable, it may be removed from the cache if necessary.
|
|
||||||
* mmaps can only be evictable if nobody is using them.
|
|
||||||
*
|
|
||||||
* @param mmap The mmap to make evictable.
|
|
||||||
*/
|
|
||||||
void makeEvictable(ClientMmap mmap) {
|
|
||||||
try {
|
|
||||||
lock.lock();
|
|
||||||
if (closed) {
|
|
||||||
// If this ClientMmapManager is closed, then don't bother with the
|
|
||||||
// cache; just close the mmap.
|
|
||||||
mmap.unmap();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
long now = System.nanoTime();
|
|
||||||
while (evictable.containsKey(now)) {
|
|
||||||
now++;
|
|
||||||
}
|
|
||||||
mmap.setLastEvictableTimeNs(now);
|
|
||||||
evictable.put(now, mmap);
|
|
||||||
} finally {
|
|
||||||
lock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
try {
|
|
||||||
lock.lock();
|
|
||||||
closed = true;
|
|
||||||
IOUtils.cleanup(LOG, cacheCleaner);
|
|
||||||
|
|
||||||
// Unmap all the mmaps that nobody is using.
|
|
||||||
// The ones which are in use will be unmapped just as soon as people stop
|
|
||||||
// using them.
|
|
||||||
evictStaleEntries(Long.MAX_VALUE);
|
|
||||||
|
|
||||||
executor.shutdown();
|
|
||||||
} finally {
|
|
||||||
lock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
public interface ClientMmapVisitor {
|
|
||||||
void accept(ClientMmap mmap);
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
public synchronized void visitMmaps(ClientMmapVisitor visitor)
|
|
||||||
throws InterruptedException {
|
|
||||||
for (Waitable<ClientMmap> entry : mmaps.values()) {
|
|
||||||
visitor.accept(entry.await());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void visitEvictable(ClientMmapVisitor visitor)
|
|
||||||
throws InterruptedException {
|
|
||||||
for (ClientMmap mmap : evictable.values()) {
|
|
||||||
visitor.accept(mmap);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,881 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.client;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.MappedByteBuffer;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import java.util.concurrent.ScheduledFuture;
|
||||||
|
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.locks.Condition;
|
||||||
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.ExtendedBlockId;
|
||||||
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.ipc.RetriableException;
|
||||||
|
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
|
||||||
|
import org.apache.hadoop.util.StringUtils;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
import org.apache.hadoop.util.Waitable;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The ShortCircuitCache tracks things which the client needs to access
|
||||||
|
* HDFS block files via short-circuit.
|
||||||
|
*
|
||||||
|
* These things include: memory-mapped regions, file descriptors, and shared
|
||||||
|
* memory areas for communicating with the DataNode.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class ShortCircuitCache implements Closeable {
|
||||||
|
public static final Log LOG = LogFactory.getLog(ShortCircuitCache.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expiry thread which makes sure that the file descriptors get closed
|
||||||
|
* after a while.
|
||||||
|
*/
|
||||||
|
private class CacheCleaner implements Runnable, Closeable {
|
||||||
|
private ScheduledFuture<?> future;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the CacheCleaner thread.
|
||||||
|
*
|
||||||
|
* Whenever a thread requests a ShortCircuitReplica object, we will make
|
||||||
|
* sure it gets one. That ShortCircuitReplica object can then be re-used
|
||||||
|
* when another thread requests a ShortCircuitReplica object for the same
|
||||||
|
* block. So in that sense, there is no maximum size to the cache.
|
||||||
|
*
|
||||||
|
* However, when a ShortCircuitReplica object is unreferenced by the
|
||||||
|
* thread(s) that are using it, it becomes evictable. There are two
|
||||||
|
* separate eviction lists-- one for mmaped objects, and another for
|
||||||
|
* non-mmaped objects. We do this in order to avoid having the regular
|
||||||
|
* files kick the mmaped files out of the cache too quickly. Reusing
|
||||||
|
* an already-existing mmap gives a huge performance boost, since the
|
||||||
|
* page table entries don't have to be re-populated. Both the mmap
|
||||||
|
* and non-mmap evictable lists have maximum sizes and maximum lifespans.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
ShortCircuitCache.this.lock.lock();
|
||||||
|
try {
|
||||||
|
if (ShortCircuitCache.this.closed) return;
|
||||||
|
long curMs = Time.monotonicNow();
|
||||||
|
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug(this + ": cache cleaner running at " + curMs);
|
||||||
|
}
|
||||||
|
|
||||||
|
int numDemoted = demoteOldEvictableMmaped(curMs);
|
||||||
|
int numPurged = 0;
|
||||||
|
Long evictionTimeNs = Long.valueOf(0);
|
||||||
|
while (true) {
|
||||||
|
Entry<Long, ShortCircuitReplica> entry =
|
||||||
|
evictableMmapped.ceilingEntry(evictionTimeNs);
|
||||||
|
if (entry == null) break;
|
||||||
|
evictionTimeNs = entry.getKey();
|
||||||
|
long evictionTimeMs =
|
||||||
|
TimeUnit.MILLISECONDS.convert(evictionTimeNs, TimeUnit.NANOSECONDS);
|
||||||
|
if (evictionTimeMs + maxNonMmappedEvictableLifespanMs >= curMs) break;
|
||||||
|
ShortCircuitReplica replica = entry.getValue();
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("CacheCleaner: purging " + replica + ": " +
|
||||||
|
StringUtils.getStackTrace(Thread.currentThread()));
|
||||||
|
}
|
||||||
|
purge(replica);
|
||||||
|
numPurged++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug(this + ": finishing cache cleaner run started at " +
|
||||||
|
curMs + ". Demoted " + numDemoted + " mmapped replicas; " +
|
||||||
|
"purged " + numPurged + " replicas.");
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
ShortCircuitCache.this.lock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
if (future != null) {
|
||||||
|
future.cancel(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFuture(ScheduledFuture<?> future) {
|
||||||
|
this.future = future;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the rate at which this cleaner thread should be scheduled.
|
||||||
|
*
|
||||||
|
* We do this by taking the minimum expiration time and dividing by 4.
|
||||||
|
*
|
||||||
|
* @return the rate in milliseconds at which this thread should be
|
||||||
|
* scheduled.
|
||||||
|
*/
|
||||||
|
public long getRateInMs() {
|
||||||
|
long minLifespanMs =
|
||||||
|
Math.min(maxNonMmappedEvictableLifespanMs,
|
||||||
|
maxEvictableMmapedLifespanMs);
|
||||||
|
long sampleTimeMs = minLifespanMs / 4;
|
||||||
|
return (sampleTimeMs < 1) ? 1 : sampleTimeMs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public interface ShortCircuitReplicaCreator {
|
||||||
|
/**
|
||||||
|
* Attempt to create a ShortCircuitReplica object.
|
||||||
|
*
|
||||||
|
* This callback will be made without holding any locks.
|
||||||
|
*
|
||||||
|
* @return a non-null ShortCircuitReplicaInfo object.
|
||||||
|
*/
|
||||||
|
ShortCircuitReplicaInfo createShortCircuitReplicaInfo();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lock protecting the cache.
|
||||||
|
*/
|
||||||
|
private final ReentrantLock lock = new ReentrantLock();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The executor service that runs the cacheCleaner.
|
||||||
|
*/
|
||||||
|
private final ScheduledThreadPoolExecutor executor
|
||||||
|
= new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder().
|
||||||
|
setDaemon(true).setNameFormat("ShortCircuitCache Cleaner").
|
||||||
|
build());
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A map containing all ShortCircuitReplicaInfo objects, organized by Key.
|
||||||
|
* ShortCircuitReplicaInfo objects may contain a replica, or an InvalidToken
|
||||||
|
* exception.
|
||||||
|
*/
|
||||||
|
private final HashMap<ExtendedBlockId, Waitable<ShortCircuitReplicaInfo>>
|
||||||
|
replicaInfoMap = new HashMap<ExtendedBlockId,
|
||||||
|
Waitable<ShortCircuitReplicaInfo>>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The CacheCleaner. We don't create this and schedule it until it becomes
|
||||||
|
* necessary.
|
||||||
|
*/
|
||||||
|
private CacheCleaner cacheCleaner;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tree of evictable elements.
|
||||||
|
*
|
||||||
|
* Maps (unique) insertion time in nanoseconds to the element.
|
||||||
|
*/
|
||||||
|
private final TreeMap<Long, ShortCircuitReplica> evictable =
|
||||||
|
new TreeMap<Long, ShortCircuitReplica>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum total size of the cache, including both mmapped and
|
||||||
|
* no$-mmapped elements.
|
||||||
|
*/
|
||||||
|
private int maxTotalSize;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Non-mmaped elements older than this will be closed.
|
||||||
|
*/
|
||||||
|
private long maxNonMmappedEvictableLifespanMs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tree of mmaped evictable elements.
|
||||||
|
*
|
||||||
|
* Maps (unique) insertion time in nanoseconds to the element.
|
||||||
|
*/
|
||||||
|
private final TreeMap<Long, ShortCircuitReplica> evictableMmapped =
|
||||||
|
new TreeMap<Long, ShortCircuitReplica>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum number of mmaped evictable elements.
|
||||||
|
*/
|
||||||
|
private int maxEvictableMmapedSize;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mmaped elements older than this will be closed.
|
||||||
|
*/
|
||||||
|
private final long maxEvictableMmapedLifespanMs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The minimum number of milliseconds we'll wait after an unsuccessful
|
||||||
|
* mmap attempt before trying again.
|
||||||
|
*/
|
||||||
|
private final long mmapRetryTimeoutMs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How long we will keep replicas in the cache before declaring them
|
||||||
|
* to be stale.
|
||||||
|
*/
|
||||||
|
private final long staleThresholdMs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True if the ShortCircuitCache is closed.
|
||||||
|
*/
|
||||||
|
private boolean closed = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of existing mmaps associated with this cache.
|
||||||
|
*/
|
||||||
|
private int outstandingMmapCount = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a {@link ShortCircuitCache} object from a {@link Configuration}
|
||||||
|
*/
|
||||||
|
public static ShortCircuitCache fromConf(Configuration conf) {
|
||||||
|
return new ShortCircuitCache(
|
||||||
|
conf.getInt(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT),
|
||||||
|
conf.getLong(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT),
|
||||||
|
conf.getInt(DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT),
|
||||||
|
conf.getLong(DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT),
|
||||||
|
conf.getLong(DFSConfigKeys.DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS_DEFAULT),
|
||||||
|
conf.getLong(DFSConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS_DEFAULT));
|
||||||
|
}
|
||||||
|
|
||||||
|
public ShortCircuitCache(int maxTotalSize, long maxNonMmappedEvictableLifespanMs,
|
||||||
|
int maxEvictableMmapedSize, long maxEvictableMmapedLifespanMs,
|
||||||
|
long mmapRetryTimeoutMs, long staleThresholdMs) {
|
||||||
|
Preconditions.checkArgument(maxTotalSize >= 0);
|
||||||
|
this.maxTotalSize = maxTotalSize;
|
||||||
|
Preconditions.checkArgument(maxNonMmappedEvictableLifespanMs >= 0);
|
||||||
|
this.maxNonMmappedEvictableLifespanMs = maxNonMmappedEvictableLifespanMs;
|
||||||
|
Preconditions.checkArgument(maxEvictableMmapedSize >= 0);
|
||||||
|
this.maxEvictableMmapedSize = maxEvictableMmapedSize;
|
||||||
|
Preconditions.checkArgument(maxEvictableMmapedLifespanMs >= 0);
|
||||||
|
this.maxEvictableMmapedLifespanMs = maxEvictableMmapedLifespanMs;
|
||||||
|
this.mmapRetryTimeoutMs = mmapRetryTimeoutMs;
|
||||||
|
this.staleThresholdMs = staleThresholdMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getMmapRetryTimeoutMs() {
|
||||||
|
return mmapRetryTimeoutMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getStaleThresholdMs() {
|
||||||
|
return staleThresholdMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment the reference count of a replica, and remove it from any free
|
||||||
|
* list it may be in.
|
||||||
|
*
|
||||||
|
* You must hold the cache lock while calling this function.
|
||||||
|
*
|
||||||
|
* @param replica The replica we're removing.
|
||||||
|
*/
|
||||||
|
private void ref(ShortCircuitReplica replica) {
|
||||||
|
lock.lock();
|
||||||
|
try {
|
||||||
|
Preconditions.checkArgument(replica.refCount > 0,
|
||||||
|
"can't ref " + replica + " because its refCount reached " +
|
||||||
|
replica.refCount);
|
||||||
|
Long evictableTimeNs = replica.getEvictableTimeNs();
|
||||||
|
replica.refCount++;
|
||||||
|
if (evictableTimeNs != null) {
|
||||||
|
String removedFrom = removeEvictable(replica);
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": " + removedFrom +
|
||||||
|
" no longer contains " + replica + ". refCount " +
|
||||||
|
(replica.refCount - 1) + " -> " + replica.refCount +
|
||||||
|
StringUtils.getStackTrace(Thread.currentThread()));
|
||||||
|
|
||||||
|
}
|
||||||
|
} else if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": replica refCount " +
|
||||||
|
(replica.refCount - 1) + " -> " + replica.refCount +
|
||||||
|
StringUtils.getStackTrace(Thread.currentThread()));
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unreference a replica.
|
||||||
|
*
|
||||||
|
* You must hold the cache lock while calling this function.
|
||||||
|
*
|
||||||
|
* @param replica The replica being unreferenced.
|
||||||
|
*/
|
||||||
|
void unref(ShortCircuitReplica replica) {
|
||||||
|
lock.lock();
|
||||||
|
try {
|
||||||
|
String addedString = "";
|
||||||
|
int newRefCount = --replica.refCount;
|
||||||
|
if (newRefCount == 0) {
|
||||||
|
// Close replica, since there are no remaining references to it.
|
||||||
|
Preconditions.checkArgument(replica.purged,
|
||||||
|
"Replica " + replica + " reached a refCount of 0 without " +
|
||||||
|
"being purged");
|
||||||
|
replica.close();
|
||||||
|
} else if (newRefCount == 1) {
|
||||||
|
Preconditions.checkState(null == replica.getEvictableTimeNs(),
|
||||||
|
"Replica " + replica + " had a refCount higher than 1, " +
|
||||||
|
"but was still evictable (evictableTimeNs = " +
|
||||||
|
replica.getEvictableTimeNs() + ")");
|
||||||
|
if (!replica.purged) {
|
||||||
|
// Add the replica to the end of an eviction list.
|
||||||
|
// Eviction lists are sorted by time.
|
||||||
|
if (replica.hasMmap()) {
|
||||||
|
insertEvictable(System.nanoTime(), replica, evictableMmapped);
|
||||||
|
addedString = "added to evictableMmapped, ";
|
||||||
|
} else {
|
||||||
|
insertEvictable(System.nanoTime(), replica, evictable);
|
||||||
|
addedString = "added to evictable, ";
|
||||||
|
}
|
||||||
|
trimEvictionMaps();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Preconditions.checkArgument(replica.refCount >= 0,
|
||||||
|
"replica's refCount went negative (refCount = " +
|
||||||
|
replica.refCount + " for " + replica + ")");
|
||||||
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": unref replica " + replica +
|
||||||
|
": " + addedString + " refCount " +
|
||||||
|
(newRefCount + 1) + " -> " + newRefCount +
|
||||||
|
StringUtils.getStackTrace(Thread.currentThread()));
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Demote old evictable mmaps into the regular eviction map.
|
||||||
|
*
|
||||||
|
* You must hold the cache lock while calling this function.
|
||||||
|
*
|
||||||
|
* @param now Current time in monotonic milliseconds.
|
||||||
|
* @return Number of replicas demoted.
|
||||||
|
*/
|
||||||
|
private int demoteOldEvictableMmaped(long now) {
|
||||||
|
int numDemoted = 0;
|
||||||
|
boolean needMoreSpace = false;
|
||||||
|
Long evictionTimeNs = Long.valueOf(0);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
Entry<Long, ShortCircuitReplica> entry =
|
||||||
|
evictableMmapped.ceilingEntry(evictionTimeNs);
|
||||||
|
if (entry == null) break;
|
||||||
|
evictionTimeNs = entry.getKey();
|
||||||
|
long evictionTimeMs =
|
||||||
|
TimeUnit.MILLISECONDS.convert(evictionTimeNs, TimeUnit.NANOSECONDS);
|
||||||
|
if (evictionTimeMs + maxEvictableMmapedLifespanMs >= now) {
|
||||||
|
if (evictableMmapped.size() < maxEvictableMmapedSize) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
needMoreSpace = true;
|
||||||
|
}
|
||||||
|
ShortCircuitReplica replica = entry.getValue();
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
String rationale = needMoreSpace ? "because we need more space" :
|
||||||
|
"because it's too old";
|
||||||
|
LOG.trace("demoteOldEvictable: demoting " + replica + ": " +
|
||||||
|
rationale + ": " +
|
||||||
|
StringUtils.getStackTrace(Thread.currentThread()));
|
||||||
|
}
|
||||||
|
removeEvictable(replica, evictableMmapped);
|
||||||
|
munmap(replica);
|
||||||
|
insertEvictable(evictionTimeNs, replica, evictable);
|
||||||
|
numDemoted++;
|
||||||
|
}
|
||||||
|
return numDemoted;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Trim the eviction lists.
|
||||||
|
*/
|
||||||
|
private void trimEvictionMaps() {
|
||||||
|
long now = Time.monotonicNow();
|
||||||
|
demoteOldEvictableMmaped(now);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
long evictableSize = evictable.size();
|
||||||
|
long evictableMmappedSize = evictableMmapped.size();
|
||||||
|
if (evictableSize + evictableMmappedSize <= maxTotalSize) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ShortCircuitReplica replica;
|
||||||
|
if (evictableSize == 0) {
|
||||||
|
replica = evictableMmapped.firstEntry().getValue();
|
||||||
|
} else {
|
||||||
|
replica = evictable.firstEntry().getValue();
|
||||||
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": trimEvictionMaps is purging " +
|
||||||
|
StringUtils.getStackTrace(Thread.currentThread()));
|
||||||
|
}
|
||||||
|
purge(replica);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Munmap a replica, updating outstandingMmapCount.
|
||||||
|
*
|
||||||
|
* @param replica The replica to munmap.
|
||||||
|
*/
|
||||||
|
private void munmap(ShortCircuitReplica replica) {
|
||||||
|
replica.munmap();
|
||||||
|
outstandingMmapCount--;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove a replica from an evictable map.
|
||||||
|
*
|
||||||
|
* @param replica The replica to remove.
|
||||||
|
* @return The map it was removed from.
|
||||||
|
*/
|
||||||
|
private String removeEvictable(ShortCircuitReplica replica) {
|
||||||
|
if (replica.hasMmap()) {
|
||||||
|
removeEvictable(replica, evictableMmapped);
|
||||||
|
return "evictableMmapped";
|
||||||
|
} else {
|
||||||
|
removeEvictable(replica, evictable);
|
||||||
|
return "evictable";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove a replica from an evictable map.
|
||||||
|
*
|
||||||
|
* @param replica The replica to remove.
|
||||||
|
* @param map The map to remove it from.
|
||||||
|
*/
|
||||||
|
private void removeEvictable(ShortCircuitReplica replica,
|
||||||
|
TreeMap<Long, ShortCircuitReplica> map) {
|
||||||
|
Long evictableTimeNs = replica.getEvictableTimeNs();
|
||||||
|
Preconditions.checkNotNull(evictableTimeNs);
|
||||||
|
ShortCircuitReplica removed = map.remove(evictableTimeNs);
|
||||||
|
Preconditions.checkState(removed == replica,
|
||||||
|
"failed to make " + replica + " unevictable");
|
||||||
|
replica.setEvictableTimeNs(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Insert a replica into an evictable map.
|
||||||
|
*
|
||||||
|
* If an element already exists with this eviction time, we add a nanosecond
|
||||||
|
* to it until we find an unused key.
|
||||||
|
*
|
||||||
|
* @param evictionTimeNs The eviction time in absolute nanoseconds.
|
||||||
|
* @param replica The replica to insert.
|
||||||
|
* @param map The map to insert it into.
|
||||||
|
*/
|
||||||
|
private void insertEvictable(Long evictionTimeNs,
|
||||||
|
ShortCircuitReplica replica, TreeMap<Long, ShortCircuitReplica> map) {
|
||||||
|
while (map.containsKey(evictionTimeNs)) {
|
||||||
|
evictionTimeNs++;
|
||||||
|
}
|
||||||
|
Preconditions.checkState(null == replica.getEvictableTimeNs());
|
||||||
|
Long time = Long.valueOf(evictionTimeNs);
|
||||||
|
replica.setEvictableTimeNs(time);
|
||||||
|
map.put(time, replica);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Purge a replica from the cache.
|
||||||
|
*
|
||||||
|
* This doesn't necessarily close the replica, since there may be
|
||||||
|
* outstanding references to it. However, it does mean the cache won't
|
||||||
|
* hand it out to anyone after this.
|
||||||
|
*
|
||||||
|
* You must hold the cache lock while calling this function.
|
||||||
|
*
|
||||||
|
* @param replica The replica being removed.
|
||||||
|
*/
|
||||||
|
private void purge(ShortCircuitReplica replica) {
|
||||||
|
boolean removedFromInfoMap = false;
|
||||||
|
String evictionMapName = null;
|
||||||
|
Preconditions.checkArgument(!replica.purged);
|
||||||
|
replica.purged = true;
|
||||||
|
Waitable<ShortCircuitReplicaInfo> val = replicaInfoMap.get(replica.key);
|
||||||
|
if (val != null) {
|
||||||
|
ShortCircuitReplicaInfo info = val.getVal();
|
||||||
|
if ((info != null) && (info.getReplica() == replica)) {
|
||||||
|
replicaInfoMap.remove(replica.key);
|
||||||
|
removedFromInfoMap = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Long evictableTimeNs = replica.getEvictableTimeNs();
|
||||||
|
if (evictableTimeNs != null) {
|
||||||
|
evictionMapName = removeEvictable(replica);
|
||||||
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
builder.append(this).append(": ").append(": removed ").
|
||||||
|
append(replica).append(" from the cache.");
|
||||||
|
if (removedFromInfoMap) {
|
||||||
|
builder.append(" Removed from the replicaInfoMap.");
|
||||||
|
}
|
||||||
|
if (evictionMapName != null) {
|
||||||
|
builder.append(" Removed from ").append(evictionMapName);
|
||||||
|
}
|
||||||
|
LOG.trace(builder.toString());
|
||||||
|
}
|
||||||
|
unref(replica);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch or create a replica.
|
||||||
|
*
|
||||||
|
* You must hold the cache lock while calling this function.
|
||||||
|
*
|
||||||
|
* @param key Key to use for lookup.
|
||||||
|
* @param creator Replica creator callback. Will be called without
|
||||||
|
* the cache lock being held.
|
||||||
|
*
|
||||||
|
* @return Null if no replica could be found or created.
|
||||||
|
* The replica, otherwise.
|
||||||
|
*/
|
||||||
|
public ShortCircuitReplicaInfo fetchOrCreate(ExtendedBlockId key,
|
||||||
|
ShortCircuitReplicaCreator creator) {
|
||||||
|
Waitable<ShortCircuitReplicaInfo> newWaitable = null;
|
||||||
|
lock.lock();
|
||||||
|
try {
|
||||||
|
ShortCircuitReplicaInfo info = null;
|
||||||
|
do {
|
||||||
|
if (closed) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": can't fetchOrCreate " + key +
|
||||||
|
" because the cache is closed.");
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
Waitable<ShortCircuitReplicaInfo> waitable = replicaInfoMap.get(key);
|
||||||
|
if (waitable != null) {
|
||||||
|
try {
|
||||||
|
info = fetch(key, waitable);
|
||||||
|
} catch (RetriableException e) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug(this + ": retrying " + e.getMessage());
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (false);
|
||||||
|
if (info != null) return info;
|
||||||
|
// We need to load the replica ourselves.
|
||||||
|
newWaitable = new Waitable<ShortCircuitReplicaInfo>(lock.newCondition());
|
||||||
|
replicaInfoMap.put(key, newWaitable);
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
return create(key, creator, newWaitable);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch an existing ReplicaInfo object.
|
||||||
|
*
|
||||||
|
* @param key The key that we're using.
|
||||||
|
* @param waitable The waitable object to wait on.
|
||||||
|
* @return The existing ReplicaInfo object, or null if there is
|
||||||
|
* none.
|
||||||
|
*
|
||||||
|
* @throws RetriableException If the caller needs to retry.
|
||||||
|
*/
|
||||||
|
private ShortCircuitReplicaInfo fetch(ExtendedBlockId key,
|
||||||
|
Waitable<ShortCircuitReplicaInfo> waitable) throws RetriableException {
|
||||||
|
// Another thread is already in the process of loading this
|
||||||
|
// ShortCircuitReplica. So we simply wait for it to complete.
|
||||||
|
ShortCircuitReplicaInfo info;
|
||||||
|
try {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": found waitable for " + key);
|
||||||
|
}
|
||||||
|
info = waitable.await();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
LOG.info(this + ": interrupted while waiting for " + key);
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
throw new RetriableException("interrupted");
|
||||||
|
}
|
||||||
|
if (info.getInvalidTokenException() != null) {
|
||||||
|
LOG.warn(this + ": could not get " + key + " due to InvalidToken " +
|
||||||
|
"exception.", info.getInvalidTokenException());
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
ShortCircuitReplica replica = info.getReplica();
|
||||||
|
if (replica == null) {
|
||||||
|
LOG.warn(this + ": failed to get " + key);
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
if (replica.purged) {
|
||||||
|
// Ignore replicas that have already been purged from the cache.
|
||||||
|
throw new RetriableException("Ignoring purged replica " +
|
||||||
|
replica + ". Retrying.");
|
||||||
|
}
|
||||||
|
// Check if the replica is stale before using it.
|
||||||
|
// If it is, purge it and retry.
|
||||||
|
if (replica.isStale()) {
|
||||||
|
LOG.info(this + ": got stale replica " + replica + ". Removing " +
|
||||||
|
"this replica from the replicaInfoMap and retrying.");
|
||||||
|
// Remove the cache's reference to the replica. This may or may not
|
||||||
|
// trigger a close.
|
||||||
|
purge(replica);
|
||||||
|
throw new RetriableException("ignoring stale replica " + replica);
|
||||||
|
}
|
||||||
|
ref(replica);
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ShortCircuitReplicaInfo create(ExtendedBlockId key,
|
||||||
|
ShortCircuitReplicaCreator creator,
|
||||||
|
Waitable<ShortCircuitReplicaInfo> newWaitable) {
|
||||||
|
// Handle loading a new replica.
|
||||||
|
ShortCircuitReplicaInfo info = null;
|
||||||
|
try {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": loading " + key);
|
||||||
|
}
|
||||||
|
info = creator.createShortCircuitReplicaInfo();
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
LOG.warn(this + ": failed to load " + key, e);
|
||||||
|
}
|
||||||
|
if (info == null) info = new ShortCircuitReplicaInfo();
|
||||||
|
lock.lock();
|
||||||
|
try {
|
||||||
|
if (info.getReplica() != null) {
|
||||||
|
// On success, make sure the cache cleaner thread is running.
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": successfully loaded " + info.getReplica());
|
||||||
|
}
|
||||||
|
startCacheCleanerThreadIfNeeded();
|
||||||
|
// Note: new ShortCircuitReplicas start with a refCount of 2,
|
||||||
|
// indicating that both this cache and whoever requested the
|
||||||
|
// creation of the replica hold a reference. So we don't need
|
||||||
|
// to increment the reference count here.
|
||||||
|
} else {
|
||||||
|
// On failure, remove the waitable from the replicaInfoMap.
|
||||||
|
Waitable<ShortCircuitReplicaInfo> waitableInMap = replicaInfoMap.get(key);
|
||||||
|
if (waitableInMap == newWaitable) replicaInfoMap.remove(key);
|
||||||
|
if (info.getInvalidTokenException() != null) {
|
||||||
|
LOG.warn(this + ": could not load " + key + " due to InvalidToken " +
|
||||||
|
"exception.", info.getInvalidTokenException());
|
||||||
|
} else {
|
||||||
|
LOG.warn(this + ": failed to load " + key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
newWaitable.provide(info);
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void startCacheCleanerThreadIfNeeded() {
|
||||||
|
if (cacheCleaner == null) {
|
||||||
|
cacheCleaner = new CacheCleaner();
|
||||||
|
long rateMs = cacheCleaner.getRateInMs();
|
||||||
|
ScheduledFuture<?> future =
|
||||||
|
executor.scheduleAtFixedRate(cacheCleaner, rateMs, rateMs,
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
cacheCleaner.setFuture(future);
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug(this + ": starting cache cleaner thread which will run " +
|
||||||
|
"every " + rateMs + " ms");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ClientMmap getOrCreateClientMmap(ShortCircuitReplica replica) {
|
||||||
|
Condition newCond;
|
||||||
|
lock.lock();
|
||||||
|
try {
|
||||||
|
while (replica.mmapData != null) {
|
||||||
|
if (replica.mmapData instanceof ClientMmap) {
|
||||||
|
ref(replica);
|
||||||
|
ClientMmap clientMmap = (ClientMmap)replica.mmapData;
|
||||||
|
clientMmap.ref();
|
||||||
|
return clientMmap;
|
||||||
|
} else if (replica.mmapData instanceof Long) {
|
||||||
|
long lastAttemptTimeMs = (Long)replica.mmapData;
|
||||||
|
long delta = Time.monotonicNow() - lastAttemptTimeMs;
|
||||||
|
if (delta < staleThresholdMs) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": can't create client mmap for " +
|
||||||
|
replica + " because we failed to " +
|
||||||
|
"create one just " + delta + "ms ago.");
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + ": retrying client mmap for " + replica +
|
||||||
|
", " + delta + " ms after the previous failure.");
|
||||||
|
}
|
||||||
|
} else if (replica.mmapData instanceof Condition) {
|
||||||
|
Condition cond = (Condition)replica.mmapData;
|
||||||
|
cond.awaitUninterruptibly();
|
||||||
|
} else {
|
||||||
|
Preconditions.checkState(false, "invalid mmapData type " +
|
||||||
|
replica.mmapData.getClass().getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
newCond = lock.newCondition();
|
||||||
|
replica.mmapData = newCond;
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
MappedByteBuffer map = replica.loadMmapInternal();
|
||||||
|
lock.lock();
|
||||||
|
try {
|
||||||
|
if (map == null) {
|
||||||
|
replica.mmapData = Long.valueOf(Time.monotonicNow());
|
||||||
|
newCond.signalAll();
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
ClientMmap clientMmap = new ClientMmap(replica, map);
|
||||||
|
outstandingMmapCount++;
|
||||||
|
replica.mmapData = clientMmap;
|
||||||
|
ref(replica);
|
||||||
|
newCond.signalAll();
|
||||||
|
return clientMmap;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the cache and free all associated resources.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
try {
|
||||||
|
lock.lock();
|
||||||
|
if (closed) return;
|
||||||
|
closed = true;
|
||||||
|
LOG.info(this + ": closing");
|
||||||
|
maxNonMmappedEvictableLifespanMs = 0;
|
||||||
|
maxEvictableMmapedSize = 0;
|
||||||
|
// Close and join cacheCleaner thread.
|
||||||
|
IOUtils.cleanup(LOG, cacheCleaner);
|
||||||
|
// Purge all replicas.
|
||||||
|
while (true) {
|
||||||
|
Entry<Long, ShortCircuitReplica> entry = evictable.firstEntry();
|
||||||
|
if (entry == null) break;
|
||||||
|
purge(entry.getValue());
|
||||||
|
}
|
||||||
|
while (true) {
|
||||||
|
Entry<Long, ShortCircuitReplica> entry = evictableMmapped.firstEntry();
|
||||||
|
if (entry == null) break;
|
||||||
|
purge(entry.getValue());
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting // ONLY for testing
|
||||||
|
public interface CacheVisitor {
|
||||||
|
void visit(int numOutstandingMmaps,
|
||||||
|
Map<ExtendedBlockId, ShortCircuitReplica> replicas,
|
||||||
|
Map<ExtendedBlockId, InvalidToken> failedLoads,
|
||||||
|
Map<Long, ShortCircuitReplica> evictable,
|
||||||
|
Map<Long, ShortCircuitReplica> evictableMmapped);
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting // ONLY for testing
|
||||||
|
public void accept(CacheVisitor visitor) {
|
||||||
|
lock.lock();
|
||||||
|
try {
|
||||||
|
Map<ExtendedBlockId, ShortCircuitReplica> replicas =
|
||||||
|
new HashMap<ExtendedBlockId, ShortCircuitReplica>();
|
||||||
|
Map<ExtendedBlockId, InvalidToken> failedLoads =
|
||||||
|
new HashMap<ExtendedBlockId, InvalidToken>();
|
||||||
|
for (Entry<ExtendedBlockId, Waitable<ShortCircuitReplicaInfo>> entry :
|
||||||
|
replicaInfoMap.entrySet()) {
|
||||||
|
Waitable<ShortCircuitReplicaInfo> waitable = entry.getValue();
|
||||||
|
if (waitable.hasVal()) {
|
||||||
|
if (waitable.getVal().getReplica() != null) {
|
||||||
|
replicas.put(entry.getKey(), waitable.getVal().getReplica());
|
||||||
|
} else {
|
||||||
|
// The exception may be null here, indicating a failed load that
|
||||||
|
// isn't the result of an invalid block token.
|
||||||
|
failedLoads.put(entry.getKey(),
|
||||||
|
waitable.getVal().getInvalidTokenException());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
builder.append("visiting ").append(visitor.getClass().getName()).
|
||||||
|
append("with outstandingMmapCount=").append(outstandingMmapCount).
|
||||||
|
append(", replicas=");
|
||||||
|
String prefix = "";
|
||||||
|
for (Entry<ExtendedBlockId, ShortCircuitReplica> entry : replicas.entrySet()) {
|
||||||
|
builder.append(prefix).append(entry.getValue());
|
||||||
|
prefix = ",";
|
||||||
|
}
|
||||||
|
prefix = "";
|
||||||
|
builder.append(", failedLoads=");
|
||||||
|
for (Entry<ExtendedBlockId, InvalidToken> entry : failedLoads.entrySet()) {
|
||||||
|
builder.append(prefix).append(entry.getValue());
|
||||||
|
prefix = ",";
|
||||||
|
}
|
||||||
|
prefix = "";
|
||||||
|
builder.append(", evictable=");
|
||||||
|
for (Entry<Long, ShortCircuitReplica> entry : evictable.entrySet()) {
|
||||||
|
builder.append(prefix).append(entry.getKey()).
|
||||||
|
append(":").append(entry.getValue());
|
||||||
|
prefix = ",";
|
||||||
|
}
|
||||||
|
prefix = "";
|
||||||
|
builder.append(", evictableMmapped=");
|
||||||
|
for (Entry<Long, ShortCircuitReplica> entry : evictableMmapped.entrySet()) {
|
||||||
|
builder.append(prefix).append(entry.getKey()).
|
||||||
|
append(":").append(entry.getValue());
|
||||||
|
prefix = ",";
|
||||||
|
}
|
||||||
|
LOG.debug(builder.toString());
|
||||||
|
}
|
||||||
|
visitor.visit(outstandingMmapCount, replicas, failedLoads,
|
||||||
|
evictable, evictableMmapped);
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "ShortCircuitCache(0x" +
|
||||||
|
Integer.toHexString(System.identityHashCode(this)) + ")";
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,268 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.client;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.MappedByteBuffer;
|
||||||
|
import java.nio.channels.FileChannel;
|
||||||
|
import java.nio.channels.FileChannel.MapMode;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.hdfs.ExtendedBlockId;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.io.nativeio.NativeIO;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A ShortCircuitReplica object contains file descriptors for a block that
|
||||||
|
* we are reading via short-circuit local reads.
|
||||||
|
*
|
||||||
|
* The file descriptors can be shared between multiple threads because
|
||||||
|
* all the operations we perform are stateless-- i.e., we use pread
|
||||||
|
* instead of read, to avoid using the shared position state.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class ShortCircuitReplica {
|
||||||
|
public static final Log LOG = LogFactory.getLog(ShortCircuitCache.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Identifies this ShortCircuitReplica object.
|
||||||
|
*/
|
||||||
|
final ExtendedBlockId key;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The block data input stream.
|
||||||
|
*/
|
||||||
|
private final FileInputStream dataStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The block metadata input stream.
|
||||||
|
*
|
||||||
|
* TODO: make this nullable if the file has no checksums on disk.
|
||||||
|
*/
|
||||||
|
private final FileInputStream metaStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Block metadata header.
|
||||||
|
*/
|
||||||
|
private final BlockMetadataHeader metaHeader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The cache we belong to.
|
||||||
|
*/
|
||||||
|
private final ShortCircuitCache cache;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Monotonic time at which the replica was created.
|
||||||
|
*/
|
||||||
|
private final long creationTimeMs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Current mmap state.
|
||||||
|
*
|
||||||
|
* Protected by the cache lock.
|
||||||
|
*/
|
||||||
|
Object mmapData;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True if this replica has been purged from the cache; false otherwise.
|
||||||
|
*
|
||||||
|
* Protected by the cache lock.
|
||||||
|
*/
|
||||||
|
boolean purged = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of external references to this replica. Replicas are referenced
|
||||||
|
* by the cache, BlockReaderLocal instances, and by ClientMmap instances.
|
||||||
|
* The number starts at 2 because when we create a replica, it is referenced
|
||||||
|
* by both the cache and the requester.
|
||||||
|
*
|
||||||
|
* Protected by the cache lock.
|
||||||
|
*/
|
||||||
|
int refCount = 2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The monotonic time in nanoseconds at which the replica became evictable, or
|
||||||
|
* null if it is not evictable.
|
||||||
|
*
|
||||||
|
* Protected by the cache lock.
|
||||||
|
*/
|
||||||
|
private Long evictableTimeNs = null;
|
||||||
|
|
||||||
|
public ShortCircuitReplica(ExtendedBlockId key,
|
||||||
|
FileInputStream dataStream, FileInputStream metaStream,
|
||||||
|
ShortCircuitCache cache, long creationTimeMs) throws IOException {
|
||||||
|
this.key = key;
|
||||||
|
this.dataStream = dataStream;
|
||||||
|
this.metaStream = metaStream;
|
||||||
|
this.metaHeader =
|
||||||
|
BlockMetadataHeader.preadHeader(metaStream.getChannel());
|
||||||
|
if (metaHeader.getVersion() != 1) {
|
||||||
|
throw new IOException("invalid metadata header version " +
|
||||||
|
metaHeader.getVersion() + ". Can only handle version 1.");
|
||||||
|
}
|
||||||
|
this.cache = cache;
|
||||||
|
this.creationTimeMs = creationTimeMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decrement the reference count.
|
||||||
|
*/
|
||||||
|
public void unref() {
|
||||||
|
cache.unref(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the replica is stale.
|
||||||
|
*
|
||||||
|
* Must be called with the cache lock held.
|
||||||
|
*/
|
||||||
|
boolean isStale() {
|
||||||
|
long deltaMs = Time.monotonicNow() - creationTimeMs;
|
||||||
|
long staleThresholdMs = cache.getStaleThresholdMs();
|
||||||
|
if (deltaMs > staleThresholdMs) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + " is stale because it's " + deltaMs +
|
||||||
|
" ms old, and staleThresholdMs = " + staleThresholdMs);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace(this + " is not stale because it's only " + deltaMs +
|
||||||
|
" ms old, and staleThresholdMs = " + staleThresholdMs);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the replica has an associated mmap that has been fully loaded.
|
||||||
|
*
|
||||||
|
* Must be called with the cache lock held.
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
public boolean hasMmap() {
|
||||||
|
return ((mmapData != null) && (mmapData instanceof ClientMmap));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free the mmap associated with this replica.
|
||||||
|
*
|
||||||
|
* Must be called with the cache lock held.
|
||||||
|
*/
|
||||||
|
void munmap() {
|
||||||
|
ClientMmap clientMmap = (ClientMmap)mmapData;
|
||||||
|
NativeIO.POSIX.munmap(clientMmap.getMappedByteBuffer());
|
||||||
|
mmapData = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the replica.
|
||||||
|
*
|
||||||
|
* Must be called after there are no more references to the replica in the
|
||||||
|
* cache or elsewhere.
|
||||||
|
*/
|
||||||
|
void close() {
|
||||||
|
Preconditions.checkState(refCount == 0,
|
||||||
|
"tried to close replica with refCount " + refCount + ": " + this);
|
||||||
|
Preconditions.checkState(purged,
|
||||||
|
"tried to close unpurged replica " + this);
|
||||||
|
if (hasMmap()) munmap();
|
||||||
|
IOUtils.cleanup(LOG, dataStream, metaStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
public FileInputStream getDataStream() {
|
||||||
|
return dataStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FileInputStream getMetaStream() {
|
||||||
|
return metaStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockMetadataHeader getMetaHeader() {
|
||||||
|
return metaHeader;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ExtendedBlockId getKey() {
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ClientMmap getOrCreateClientMmap() {
|
||||||
|
return cache.getOrCreateClientMmap(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
MappedByteBuffer loadMmapInternal() {
|
||||||
|
try {
|
||||||
|
FileChannel channel = dataStream.getChannel();
|
||||||
|
return channel.map(MapMode.READ_ONLY, 0, channel.size());
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn(this + ": mmap error", e);
|
||||||
|
return null;
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
LOG.warn(this + ": mmap error", e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the evictable time in nanoseconds.
|
||||||
|
*
|
||||||
|
* Note: you must hold the cache lock to call this function.
|
||||||
|
*
|
||||||
|
* @return the evictable time in nanoseconds.
|
||||||
|
*/
|
||||||
|
public Long getEvictableTimeNs() {
|
||||||
|
return evictableTimeNs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the evictable time in nanoseconds.
|
||||||
|
*
|
||||||
|
* Note: you must hold the cache lock to call this function.
|
||||||
|
*
|
||||||
|
* @param evictableTimeNs The evictable time in nanoseconds, or null
|
||||||
|
* to set no evictable time.
|
||||||
|
*/
|
||||||
|
void setEvictableTimeNs(Long evictableTimeNs) {
|
||||||
|
this.evictableTimeNs = evictableTimeNs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert the replica to a string for debugging purposes.
|
||||||
|
* Note that we can't take the lock here.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return new StringBuilder().append("ShortCircuitReplica{").
|
||||||
|
append("key=").append(key).
|
||||||
|
append(", metaHeader.version=").append(metaHeader.getVersion()).
|
||||||
|
append(", metaHeader.checksum=").append(metaHeader.getChecksum()).
|
||||||
|
append(", ident=").append("0x").
|
||||||
|
append(Integer.toHexString(System.identityHashCode(this))).
|
||||||
|
append(", creationTimeMs=").append(creationTimeMs).
|
||||||
|
append("}").toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,64 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.client;
|
||||||
|
|
||||||
|
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
|
||||||
|
|
||||||
|
public final class ShortCircuitReplicaInfo {
|
||||||
|
private final ShortCircuitReplica replica;
|
||||||
|
private final InvalidToken exc;
|
||||||
|
|
||||||
|
public ShortCircuitReplicaInfo() {
|
||||||
|
this.replica = null;
|
||||||
|
this.exc = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ShortCircuitReplicaInfo(ShortCircuitReplica replica) {
|
||||||
|
this.replica = replica;
|
||||||
|
this.exc = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ShortCircuitReplicaInfo(InvalidToken exc) {
|
||||||
|
this.replica = null;
|
||||||
|
this.exc = exc;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ShortCircuitReplica getReplica() {
|
||||||
|
return replica;
|
||||||
|
}
|
||||||
|
|
||||||
|
public InvalidToken getInvalidTokenException() {
|
||||||
|
return exc;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
String prefix = "";
|
||||||
|
builder.append("ShortCircuitReplicaInfo{");
|
||||||
|
if (replica != null) {
|
||||||
|
builder.append(prefix).append(replica);
|
||||||
|
prefix = ", ";
|
||||||
|
}
|
||||||
|
if (exc != null) {
|
||||||
|
builder.append(prefix).append(exc);
|
||||||
|
prefix = ", ";
|
||||||
|
}
|
||||||
|
builder.append("}");
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -121,7 +121,8 @@ public static enum Feature implements LayoutFeature {
|
||||||
ADD_DATANODE_AND_STORAGE_UUIDS(-49, "Replace StorageID with DatanodeUuid."
|
ADD_DATANODE_AND_STORAGE_UUIDS(-49, "Replace StorageID with DatanodeUuid."
|
||||||
+ " Use distinct StorageUuid per storage directory."),
|
+ " Use distinct StorageUuid per storage directory."),
|
||||||
ADD_LAYOUT_FLAGS(-50, "Add support for layout flags."),
|
ADD_LAYOUT_FLAGS(-50, "Add support for layout flags."),
|
||||||
CACHING(-51, "Support for cache pools and path-based caching");
|
CACHING(-51, "Support for cache pools and path-based caching"),
|
||||||
|
PROTOBUF_FORMAT(-52, "Use protobuf to serialize FSImage");
|
||||||
|
|
||||||
private final FeatureInfo info;
|
private final FeatureInfo info;
|
||||||
|
|
||||||
|
|
|
@ -103,9 +103,10 @@ public DatanodeProtocolClientSideTranslatorPB(InetSocketAddress nameNodeAddr,
|
||||||
private static DatanodeProtocolPB createNamenode(
|
private static DatanodeProtocolPB createNamenode(
|
||||||
InetSocketAddress nameNodeAddr, Configuration conf,
|
InetSocketAddress nameNodeAddr, Configuration conf,
|
||||||
UserGroupInformation ugi) throws IOException {
|
UserGroupInformation ugi) throws IOException {
|
||||||
return RPC.getProxy(DatanodeProtocolPB.class,
|
return RPC.getProtocolProxy(DatanodeProtocolPB.class,
|
||||||
RPC.getProtocolVersion(DatanodeProtocolPB.class), nameNodeAddr, ugi,
|
RPC.getProtocolVersion(DatanodeProtocolPB.class), nameNodeAddr, ugi,
|
||||||
conf, NetUtils.getSocketFactory(conf, DatanodeProtocolPB.class));
|
conf, NetUtils.getSocketFactory(conf, DatanodeProtocolPB.class),
|
||||||
|
org.apache.hadoop.ipc.Client.getPingInterval(conf), null).getProxy();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Create a {@link NameNode} proxy */
|
/** Create a {@link NameNode} proxy */
|
||||||
|
|
|
@ -23,12 +23,16 @@
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InterruptedIOException;
|
import java.io.InterruptedIOException;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
|
import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
|
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
|
||||||
|
@ -46,6 +50,10 @@
|
||||||
import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager;
|
import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager;
|
||||||
import org.apache.hadoop.security.token.delegation.DelegationKey;
|
import org.apache.hadoop.security.token.delegation.DelegationKey;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.protobuf.ByteString;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A HDFS specific delegation token secret manager.
|
* A HDFS specific delegation token secret manager.
|
||||||
* The secret manager is responsible for generating and accepting the password
|
* The secret manager is responsible for generating and accepting the password
|
||||||
|
@ -168,6 +176,44 @@ public synchronized void loadSecretManagerStateCompat(DataInput in)
|
||||||
serializerCompat.load(in);
|
serializerCompat.load(in);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class SecretManagerState {
|
||||||
|
public final SecretManagerSection section;
|
||||||
|
public final List<SecretManagerSection.DelegationKey> keys;
|
||||||
|
public final List<SecretManagerSection.PersistToken> tokens;
|
||||||
|
|
||||||
|
public SecretManagerState(
|
||||||
|
SecretManagerSection s,
|
||||||
|
List<SecretManagerSection.DelegationKey> keys,
|
||||||
|
List<SecretManagerSection.PersistToken> tokens) {
|
||||||
|
this.section = s;
|
||||||
|
this.keys = keys;
|
||||||
|
this.tokens = tokens;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void loadSecretManagerState(SecretManagerState state)
|
||||||
|
throws IOException {
|
||||||
|
Preconditions.checkState(!running,
|
||||||
|
"Can't load state from image in a running SecretManager.");
|
||||||
|
|
||||||
|
currentId = state.section.getCurrentId();
|
||||||
|
delegationTokenSequenceNumber = state.section.getTokenSequenceNumber();
|
||||||
|
for (SecretManagerSection.DelegationKey k : state.keys) {
|
||||||
|
addKey(new DelegationKey(k.getId(), k.getExpiryDate(), k.hasKey() ? k
|
||||||
|
.getKey().toByteArray() : null));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (SecretManagerSection.PersistToken t : state.tokens) {
|
||||||
|
DelegationTokenIdentifier id = new DelegationTokenIdentifier(new Text(
|
||||||
|
t.getOwner()), new Text(t.getRenewer()), new Text(t.getRealUser()));
|
||||||
|
id.setIssueDate(t.getIssueDate());
|
||||||
|
id.setMaxDate(t.getMaxDate());
|
||||||
|
id.setSequenceNumber(t.getSequenceNumber());
|
||||||
|
id.setMasterKeyId(t.getMasterKeyId());
|
||||||
|
addPersistedDelegationToken(id, t.getExpiryDate());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Store the current state of the SecretManager for persistence
|
* Store the current state of the SecretManager for persistence
|
||||||
*
|
*
|
||||||
|
@ -180,6 +226,42 @@ public synchronized void saveSecretManagerStateCompat(DataOutputStream out,
|
||||||
serializerCompat.save(out, sdPath);
|
serializerCompat.save(out, sdPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public synchronized SecretManagerState saveSecretManagerState() {
|
||||||
|
SecretManagerSection s = SecretManagerSection.newBuilder()
|
||||||
|
.setCurrentId(currentId)
|
||||||
|
.setTokenSequenceNumber(delegationTokenSequenceNumber)
|
||||||
|
.setNumKeys(allKeys.size()).setNumTokens(currentTokens.size()).build();
|
||||||
|
ArrayList<SecretManagerSection.DelegationKey> keys = Lists
|
||||||
|
.newArrayListWithCapacity(allKeys.size());
|
||||||
|
ArrayList<SecretManagerSection.PersistToken> tokens = Lists
|
||||||
|
.newArrayListWithCapacity(currentTokens.size());
|
||||||
|
|
||||||
|
for (DelegationKey v : allKeys.values()) {
|
||||||
|
SecretManagerSection.DelegationKey.Builder b = SecretManagerSection.DelegationKey
|
||||||
|
.newBuilder().setId(v.getKeyId()).setExpiryDate(v.getExpiryDate());
|
||||||
|
if (v.getEncodedKey() != null) {
|
||||||
|
b.setKey(ByteString.copyFrom(v.getEncodedKey()));
|
||||||
|
}
|
||||||
|
keys.add(b.build());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Entry<DelegationTokenIdentifier, DelegationTokenInformation> e : currentTokens
|
||||||
|
.entrySet()) {
|
||||||
|
DelegationTokenIdentifier id = e.getKey();
|
||||||
|
SecretManagerSection.PersistToken.Builder b = SecretManagerSection.PersistToken
|
||||||
|
.newBuilder().setOwner(id.getOwner().toString())
|
||||||
|
.setRenewer(id.getRenewer().toString())
|
||||||
|
.setRealUser(id.getRealUser().toString())
|
||||||
|
.setIssueDate(id.getIssueDate()).setMaxDate(id.getMaxDate())
|
||||||
|
.setSequenceNumber(id.getSequenceNumber())
|
||||||
|
.setMasterKeyId(id.getMasterKeyId())
|
||||||
|
.setExpiryDate(e.getValue().getRenewDate());
|
||||||
|
tokens.add(b.build());
|
||||||
|
}
|
||||||
|
|
||||||
|
return new SecretManagerState(s, keys, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This method is intended to be used only while reading edit logs.
|
* This method is intended to be used only while reading edit logs.
|
||||||
*
|
*
|
||||||
|
@ -431,4 +513,5 @@ private synchronized void loadAllKeys(DataInput in) throws IOException {
|
||||||
prog.endStep(Phase.LOADING_FSIMAGE, step);
|
prog.endStep(Phase.LOADING_FSIMAGE, step);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -633,9 +633,11 @@ private boolean isGoodTarget(DatanodeStorageInfo storage,
|
||||||
// check the communication traffic of the target machine
|
// check the communication traffic of the target machine
|
||||||
if (considerLoad) {
|
if (considerLoad) {
|
||||||
double avgLoad = 0;
|
double avgLoad = 0;
|
||||||
int size = clusterMap.getNumOfLeaves();
|
if (stats != null) {
|
||||||
if (size != 0 && stats != null) {
|
int size = stats.getNumDatanodesInService();
|
||||||
avgLoad = (double)stats.getTotalLoad()/size;
|
if (size != 0) {
|
||||||
|
avgLoad = (double)stats.getTotalLoad()/size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (node.getXceiverCount() > (2.0 * avgLoad)) {
|
if (node.getXceiverCount() > (2.0 * avgLoad)) {
|
||||||
logNodeIsNotChosen(storage, "the node is too busy ");
|
logNodeIsNotChosen(storage, "the node is too busy ");
|
||||||
|
|
|
@ -18,7 +18,26 @@
|
||||||
|
|
||||||
package org.apache.hadoop.hdfs.server.common;
|
package org.apache.hadoop.hdfs.server.common;
|
||||||
|
|
||||||
import com.google.common.base.Charsets;
|
import static org.apache.hadoop.fs.CommonConfigurationKeys.DEFAULT_HADOOP_HTTP_STATIC_USER;
|
||||||
|
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_HTTP_STATIC_USER;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.DataInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
import java.net.Socket;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import javax.servlet.ServletContext;
|
||||||
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
import javax.servlet.jsp.JspWriter;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -27,10 +46,17 @@
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hdfs.BlockReader;
|
import org.apache.hadoop.hdfs.BlockReader;
|
||||||
import org.apache.hadoop.hdfs.BlockReaderFactory;
|
import org.apache.hadoop.hdfs.BlockReaderFactory;
|
||||||
|
import org.apache.hadoop.hdfs.ClientContext;
|
||||||
import org.apache.hadoop.hdfs.DFSClient;
|
import org.apache.hadoop.hdfs.DFSClient;
|
||||||
import org.apache.hadoop.hdfs.DFSUtil;
|
import org.apache.hadoop.hdfs.DFSUtil;
|
||||||
|
import org.apache.hadoop.hdfs.RemotePeerFactory;
|
||||||
|
import org.apache.hadoop.hdfs.net.Peer;
|
||||||
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
||||||
import org.apache.hadoop.hdfs.protocol.*;
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
|
import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
|
||||||
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
|
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
|
||||||
|
@ -53,22 +79,7 @@
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
import org.apache.hadoop.util.VersionInfo;
|
import org.apache.hadoop.util.VersionInfo;
|
||||||
|
|
||||||
import javax.servlet.ServletContext;
|
import com.google.common.base.Charsets;
|
||||||
import javax.servlet.http.HttpServletRequest;
|
|
||||||
import javax.servlet.jsp.JspWriter;
|
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.DataInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.net.InetSocketAddress;
|
|
||||||
import java.net.Socket;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.net.URLEncoder;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
import static org.apache.hadoop.fs.CommonConfigurationKeys.DEFAULT_HADOOP_HTTP_STATIC_USER;
|
|
||||||
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_HTTP_STATIC_USER;
|
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public class JspHelper {
|
public class JspHelper {
|
||||||
|
@ -168,101 +179,97 @@ public static DatanodeInfo bestNode(LocatedBlocks blks, Configuration conf)
|
||||||
}
|
}
|
||||||
NodeRecord[] nodes = map.values().toArray(new NodeRecord[map.size()]);
|
NodeRecord[] nodes = map.values().toArray(new NodeRecord[map.size()]);
|
||||||
Arrays.sort(nodes, new NodeRecordComparator());
|
Arrays.sort(nodes, new NodeRecordComparator());
|
||||||
return bestNode(nodes, false, conf);
|
return bestNode(nodes, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static DatanodeInfo bestNode(LocatedBlock blk, Configuration conf)
|
public static DatanodeInfo bestNode(LocatedBlock blk, Configuration conf)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
DatanodeInfo[] nodes = blk.getLocations();
|
DatanodeInfo[] nodes = blk.getLocations();
|
||||||
return bestNode(nodes, true, conf);
|
return bestNode(nodes, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static DatanodeInfo bestNode(DatanodeInfo[] nodes, boolean doRandom,
|
private static DatanodeInfo bestNode(DatanodeInfo[] nodes, boolean doRandom)
|
||||||
Configuration conf) throws IOException {
|
throws IOException {
|
||||||
TreeSet<DatanodeInfo> deadNodes = new TreeSet<DatanodeInfo>();
|
|
||||||
DatanodeInfo chosenNode = null;
|
|
||||||
int failures = 0;
|
|
||||||
Socket s = null;
|
|
||||||
int index = -1;
|
|
||||||
if (nodes == null || nodes.length == 0) {
|
if (nodes == null || nodes.length == 0) {
|
||||||
throw new IOException("No nodes contain this block");
|
throw new IOException("No nodes contain this block");
|
||||||
}
|
}
|
||||||
while (s == null) {
|
int l = 0;
|
||||||
if (chosenNode == null) {
|
while (l < nodes.length && !nodes[l].isDecommissioned()) {
|
||||||
do {
|
++l;
|
||||||
if (doRandom) {
|
|
||||||
index = DFSUtil.getRandom().nextInt(nodes.length);
|
|
||||||
} else {
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
chosenNode = nodes[index];
|
|
||||||
} while (deadNodes.contains(chosenNode));
|
|
||||||
}
|
|
||||||
chosenNode = nodes[index];
|
|
||||||
|
|
||||||
//just ping to check whether the node is alive
|
|
||||||
InetSocketAddress targetAddr = NetUtils.createSocketAddr(
|
|
||||||
chosenNode.getInfoAddr());
|
|
||||||
|
|
||||||
try {
|
|
||||||
s = NetUtils.getDefaultSocketFactory(conf).createSocket();
|
|
||||||
s.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT);
|
|
||||||
s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
|
|
||||||
} catch (IOException e) {
|
|
||||||
deadNodes.add(chosenNode);
|
|
||||||
IOUtils.closeSocket(s);
|
|
||||||
s = null;
|
|
||||||
failures++;
|
|
||||||
}
|
|
||||||
if (failures == nodes.length)
|
|
||||||
throw new IOException("Could not reach the block containing the data. Please try again");
|
|
||||||
|
|
||||||
}
|
}
|
||||||
s.close();
|
|
||||||
return chosenNode;
|
if (l == 0) {
|
||||||
|
throw new IOException("No active nodes contain this block");
|
||||||
|
}
|
||||||
|
|
||||||
|
int index = doRandom ? DFSUtil.getRandom().nextInt(l) : 0;
|
||||||
|
return nodes[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void streamBlockInAscii(InetSocketAddress addr, String poolId,
|
public static void streamBlockInAscii(InetSocketAddress addr, String poolId,
|
||||||
long blockId, Token<BlockTokenIdentifier> blockToken, long genStamp,
|
long blockId, Token<BlockTokenIdentifier> blockToken, long genStamp,
|
||||||
long blockSize, long offsetIntoBlock, long chunkSizeToView,
|
long blockSize, long offsetIntoBlock, long chunkSizeToView,
|
||||||
JspWriter out, Configuration conf, DFSClient.Conf dfsConf,
|
JspWriter out, final Configuration conf, DFSClient.Conf dfsConf,
|
||||||
DataEncryptionKey encryptionKey)
|
final DataEncryptionKey encryptionKey)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (chunkSizeToView == 0) return;
|
if (chunkSizeToView == 0) return;
|
||||||
Socket s = NetUtils.getDefaultSocketFactory(conf).createSocket();
|
|
||||||
s.connect(addr, HdfsServerConstants.READ_TIMEOUT);
|
|
||||||
s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
|
|
||||||
|
|
||||||
int amtToRead = (int)Math.min(chunkSizeToView, blockSize - offsetIntoBlock);
|
int amtToRead = (int)Math.min(chunkSizeToView, blockSize - offsetIntoBlock);
|
||||||
|
|
||||||
// Use the block name for file name.
|
BlockReader blockReader = new BlockReaderFactory(dfsConf).
|
||||||
String file = BlockReaderFactory.getFileName(addr, poolId, blockId);
|
setInetSocketAddress(addr).
|
||||||
BlockReader blockReader = BlockReaderFactory.newBlockReader(dfsConf, file,
|
setBlock(new ExtendedBlock(poolId, blockId, 0, genStamp)).
|
||||||
new ExtendedBlock(poolId, blockId, 0, genStamp), blockToken,
|
setFileName(BlockReaderFactory.getFileName(addr, poolId, blockId)).
|
||||||
offsetIntoBlock, amtToRead, true,
|
setBlockToken(blockToken).
|
||||||
"JspHelper", TcpPeerServer.peerFromSocketAndKey(s, encryptionKey),
|
setStartOffset(offsetIntoBlock).
|
||||||
new DatanodeID(addr.getAddress().getHostAddress(),
|
setLength(amtToRead).
|
||||||
addr.getHostName(), poolId, addr.getPort(), 0, 0, 0), null,
|
setVerifyChecksum(true).
|
||||||
null, null, false, CachingStrategy.newDefaultStrategy());
|
setClientName("JspHelper").
|
||||||
|
setClientCacheContext(ClientContext.getFromConf(conf)).
|
||||||
|
setDatanodeInfo(new DatanodeInfo(
|
||||||
|
new DatanodeID(addr.getAddress().getHostAddress(),
|
||||||
|
addr.getHostName(), poolId, addr.getPort(), 0, 0, 0))).
|
||||||
|
setCachingStrategy(CachingStrategy.newDefaultStrategy()).
|
||||||
|
setConfiguration(conf).
|
||||||
|
setRemotePeerFactory(new RemotePeerFactory() {
|
||||||
|
@Override
|
||||||
|
public Peer newConnectedPeer(InetSocketAddress addr)
|
||||||
|
throws IOException {
|
||||||
|
Peer peer = null;
|
||||||
|
Socket sock = NetUtils.getDefaultSocketFactory(conf).createSocket();
|
||||||
|
try {
|
||||||
|
sock.connect(addr, HdfsServerConstants.READ_TIMEOUT);
|
||||||
|
sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
|
||||||
|
peer = TcpPeerServer.peerFromSocketAndKey(sock, encryptionKey);
|
||||||
|
} finally {
|
||||||
|
if (peer == null) {
|
||||||
|
IOUtils.closeSocket(sock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return peer;
|
||||||
|
}
|
||||||
|
}).
|
||||||
|
build();
|
||||||
|
|
||||||
final byte[] buf = new byte[amtToRead];
|
final byte[] buf = new byte[amtToRead];
|
||||||
int readOffset = 0;
|
try {
|
||||||
int retries = 2;
|
int readOffset = 0;
|
||||||
while ( amtToRead > 0 ) {
|
int retries = 2;
|
||||||
int numRead = amtToRead;
|
while (amtToRead > 0) {
|
||||||
try {
|
int numRead = amtToRead;
|
||||||
blockReader.readFully(buf, readOffset, amtToRead);
|
try {
|
||||||
|
blockReader.readFully(buf, readOffset, amtToRead);
|
||||||
|
} catch (IOException e) {
|
||||||
|
retries--;
|
||||||
|
if (retries == 0)
|
||||||
|
throw new IOException("Could not read data from datanode");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
amtToRead -= numRead;
|
||||||
|
readOffset += numRead;
|
||||||
}
|
}
|
||||||
catch (IOException e) {
|
} finally {
|
||||||
retries--;
|
blockReader.close();
|
||||||
if (retries == 0)
|
|
||||||
throw new IOException("Could not read data from datanode");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
amtToRead -= numRead;
|
|
||||||
readOffset += numRead;
|
|
||||||
}
|
}
|
||||||
blockReader.close();
|
|
||||||
out.print(HtmlQuoting.quoteHtmlChars(new String(buf, Charsets.UTF_8)));
|
out.print(HtmlQuoting.quoteHtmlChars(new String(buf, Charsets.UTF_8)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,6 +34,8 @@
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -55,7 +57,8 @@ public class BlockMetadataHeader {
|
||||||
private short version;
|
private short version;
|
||||||
private DataChecksum checksum = null;
|
private DataChecksum checksum = null;
|
||||||
|
|
||||||
BlockMetadataHeader(short version, DataChecksum checksum) {
|
@VisibleForTesting
|
||||||
|
public BlockMetadataHeader(short version, DataChecksum checksum) {
|
||||||
this.checksum = checksum;
|
this.checksum = checksum;
|
||||||
this.version = version;
|
this.version = version;
|
||||||
}
|
}
|
||||||
|
@ -148,7 +151,8 @@ private static BlockMetadataHeader readHeader(short version, DataInputStream in)
|
||||||
* @return
|
* @return
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
private static void writeHeader(DataOutputStream out,
|
@VisibleForTesting
|
||||||
|
public static void writeHeader(DataOutputStream out,
|
||||||
BlockMetadataHeader header)
|
BlockMetadataHeader header)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
out.writeShort(header.getVersion());
|
out.writeShort(header.getVersion());
|
||||||
|
|
|
@ -2525,7 +2525,7 @@ public void clearAllBlockSecretKeys() {
|
||||||
/**
|
/**
|
||||||
* Get current value of the max balancer bandwidth in bytes per second.
|
* Get current value of the max balancer bandwidth in bytes per second.
|
||||||
*
|
*
|
||||||
* @return bandwidth Blanacer bandwidth in bytes per second for this datanode.
|
* @return Balancer bandwidth in bytes per second for this datanode.
|
||||||
*/
|
*/
|
||||||
public Long getBalancerBandwidth() {
|
public Long getBalancerBandwidth() {
|
||||||
DataXceiverServer dxcs =
|
DataXceiverServer dxcs =
|
||||||
|
|
|
@ -37,12 +37,12 @@
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang.builder.HashCodeBuilder;
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.fs.ChecksumException;
|
import org.apache.hadoop.fs.ChecksumException;
|
||||||
|
import org.apache.hadoop.hdfs.ExtendedBlockId;
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
|
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
|
@ -56,43 +56,6 @@
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
@InterfaceStability.Unstable
|
@InterfaceStability.Unstable
|
||||||
public class FsDatasetCache {
|
public class FsDatasetCache {
|
||||||
/**
|
|
||||||
* Keys which identify MappableBlocks.
|
|
||||||
*/
|
|
||||||
private static final class Key {
|
|
||||||
/**
|
|
||||||
* Block id.
|
|
||||||
*/
|
|
||||||
final long id;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Block pool id.
|
|
||||||
*/
|
|
||||||
final String bpid;
|
|
||||||
|
|
||||||
Key(long id, String bpid) {
|
|
||||||
this.id = id;
|
|
||||||
this.bpid = bpid;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (o == null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!(o.getClass() == getClass())) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
Key other = (Key)o;
|
|
||||||
return ((other.id == this.id) && (other.bpid.equals(this.bpid)));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return new HashCodeBuilder().append(id).append(bpid).hashCode();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* MappableBlocks that we know about.
|
* MappableBlocks that we know about.
|
||||||
*/
|
*/
|
||||||
|
@ -143,7 +106,8 @@ public boolean shouldAdvertise() {
|
||||||
/**
|
/**
|
||||||
* Stores MappableBlock objects and the states they're in.
|
* Stores MappableBlock objects and the states they're in.
|
||||||
*/
|
*/
|
||||||
private final HashMap<Key, Value> mappableBlockMap = new HashMap<Key, Value>();
|
private final HashMap<ExtendedBlockId, Value> mappableBlockMap =
|
||||||
|
new HashMap<ExtendedBlockId, Value>();
|
||||||
|
|
||||||
private final AtomicLong numBlocksCached = new AtomicLong(0);
|
private final AtomicLong numBlocksCached = new AtomicLong(0);
|
||||||
|
|
||||||
|
@ -260,12 +224,12 @@ public FsDatasetCache(FsDatasetImpl dataset) {
|
||||||
*/
|
*/
|
||||||
synchronized List<Long> getCachedBlocks(String bpid) {
|
synchronized List<Long> getCachedBlocks(String bpid) {
|
||||||
List<Long> blocks = new ArrayList<Long>();
|
List<Long> blocks = new ArrayList<Long>();
|
||||||
for (Iterator<Entry<Key, Value>> iter =
|
for (Iterator<Entry<ExtendedBlockId, Value>> iter =
|
||||||
mappableBlockMap.entrySet().iterator(); iter.hasNext(); ) {
|
mappableBlockMap.entrySet().iterator(); iter.hasNext(); ) {
|
||||||
Entry<Key, Value> entry = iter.next();
|
Entry<ExtendedBlockId, Value> entry = iter.next();
|
||||||
if (entry.getKey().bpid.equals(bpid)) {
|
if (entry.getKey().getBlockPoolId().equals(bpid)) {
|
||||||
if (entry.getValue().state.shouldAdvertise()) {
|
if (entry.getValue().state.shouldAdvertise()) {
|
||||||
blocks.add(entry.getKey().id);
|
blocks.add(entry.getKey().getBlockId());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -278,7 +242,7 @@ synchronized List<Long> getCachedBlocks(String bpid) {
|
||||||
synchronized void cacheBlock(long blockId, String bpid,
|
synchronized void cacheBlock(long blockId, String bpid,
|
||||||
String blockFileName, long length, long genstamp,
|
String blockFileName, long length, long genstamp,
|
||||||
Executor volumeExecutor) {
|
Executor volumeExecutor) {
|
||||||
Key key = new Key(blockId, bpid);
|
ExtendedBlockId key = new ExtendedBlockId(blockId, bpid);
|
||||||
Value prevValue = mappableBlockMap.get(key);
|
Value prevValue = mappableBlockMap.get(key);
|
||||||
if (prevValue != null) {
|
if (prevValue != null) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
|
@ -299,7 +263,7 @@ synchronized void cacheBlock(long blockId, String bpid,
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized void uncacheBlock(String bpid, long blockId) {
|
synchronized void uncacheBlock(String bpid, long blockId) {
|
||||||
Key key = new Key(blockId, bpid);
|
ExtendedBlockId key = new ExtendedBlockId(blockId, bpid);
|
||||||
Value prevValue = mappableBlockMap.get(key);
|
Value prevValue = mappableBlockMap.get(key);
|
||||||
|
|
||||||
if (prevValue == null) {
|
if (prevValue == null) {
|
||||||
|
@ -344,12 +308,12 @@ synchronized void uncacheBlock(String bpid, long blockId) {
|
||||||
* Background worker that mmaps, mlocks, and checksums a block
|
* Background worker that mmaps, mlocks, and checksums a block
|
||||||
*/
|
*/
|
||||||
private class CachingTask implements Runnable {
|
private class CachingTask implements Runnable {
|
||||||
private final Key key;
|
private final ExtendedBlockId key;
|
||||||
private final String blockFileName;
|
private final String blockFileName;
|
||||||
private final long length;
|
private final long length;
|
||||||
private final long genstamp;
|
private final long genstamp;
|
||||||
|
|
||||||
CachingTask(Key key, String blockFileName, long length, long genstamp) {
|
CachingTask(ExtendedBlockId key, String blockFileName, long length, long genstamp) {
|
||||||
this.key = key;
|
this.key = key;
|
||||||
this.blockFileName = blockFileName;
|
this.blockFileName = blockFileName;
|
||||||
this.length = length;
|
this.length = length;
|
||||||
|
@ -361,13 +325,13 @@ public void run() {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
FileInputStream blockIn = null, metaIn = null;
|
FileInputStream blockIn = null, metaIn = null;
|
||||||
MappableBlock mappableBlock = null;
|
MappableBlock mappableBlock = null;
|
||||||
ExtendedBlock extBlk =
|
ExtendedBlock extBlk = new ExtendedBlock(key.getBlockPoolId(),
|
||||||
new ExtendedBlock(key.bpid, key.id, length, genstamp);
|
key.getBlockId(), length, genstamp);
|
||||||
long newUsedBytes = usedBytesCount.reserve(length);
|
long newUsedBytes = usedBytesCount.reserve(length);
|
||||||
if (newUsedBytes < 0) {
|
if (newUsedBytes < 0) {
|
||||||
LOG.warn("Failed to cache block id " + key.id + ", pool " + key.bpid +
|
LOG.warn("Failed to cache " + key + ": could not reserve " + length +
|
||||||
": could not reserve " + length + " more bytes in the " +
|
" more bytes in the cache: " +
|
||||||
"cache: " + DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY +
|
DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY +
|
||||||
" of " + maxBytes + " exceeded.");
|
" of " + maxBytes + " exceeded.");
|
||||||
numBlocksFailedToCache.incrementAndGet();
|
numBlocksFailedToCache.incrementAndGet();
|
||||||
return;
|
return;
|
||||||
|
@ -378,16 +342,15 @@ public void run() {
|
||||||
metaIn = (FileInputStream)dataset.getMetaDataInputStream(extBlk)
|
metaIn = (FileInputStream)dataset.getMetaDataInputStream(extBlk)
|
||||||
.getWrappedStream();
|
.getWrappedStream();
|
||||||
} catch (ClassCastException e) {
|
} catch (ClassCastException e) {
|
||||||
LOG.warn("Failed to cache block with id " + key.id + ", pool " +
|
LOG.warn("Failed to cache " + key +
|
||||||
key.bpid + ": Underlying blocks are not backed by files.", e);
|
": Underlying blocks are not backed by files.", e);
|
||||||
return;
|
return;
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
LOG.info("Failed to cache block with id " + key.id + ", pool " +
|
LOG.info("Failed to cache " + key + ": failed to find backing " +
|
||||||
key.bpid + ": failed to find backing files.");
|
"files.");
|
||||||
return;
|
return;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.warn("Failed to cache block with id " + key.id + ", pool " +
|
LOG.warn("Failed to cache " + key + ": failed to open file", e);
|
||||||
key.bpid + ": failed to open file", e);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
@ -395,11 +358,10 @@ public void run() {
|
||||||
load(length, blockIn, metaIn, blockFileName);
|
load(length, blockIn, metaIn, blockFileName);
|
||||||
} catch (ChecksumException e) {
|
} catch (ChecksumException e) {
|
||||||
// Exception message is bogus since this wasn't caused by a file read
|
// Exception message is bogus since this wasn't caused by a file read
|
||||||
LOG.warn("Failed to cache block " + key.id + " in " + key.bpid + ": " +
|
LOG.warn("Failed to cache " + key + ": checksum verification failed.");
|
||||||
"checksum verification failed.");
|
|
||||||
return;
|
return;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.warn("Failed to cache block " + key.id + " in " + key.bpid, e);
|
LOG.warn("Failed to cache " + key, e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
synchronized (FsDatasetCache.this) {
|
synchronized (FsDatasetCache.this) {
|
||||||
|
@ -409,15 +371,14 @@ public void run() {
|
||||||
value.state == State.CACHING_CANCELLED);
|
value.state == State.CACHING_CANCELLED);
|
||||||
if (value.state == State.CACHING_CANCELLED) {
|
if (value.state == State.CACHING_CANCELLED) {
|
||||||
mappableBlockMap.remove(key);
|
mappableBlockMap.remove(key);
|
||||||
LOG.warn("Caching of block " + key.id + " in " + key.bpid +
|
LOG.warn("Caching of " + key + " was cancelled.");
|
||||||
" was cancelled.");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
mappableBlockMap.put(key, new Value(mappableBlock, State.CACHED));
|
mappableBlockMap.put(key, new Value(mappableBlock, State.CACHED));
|
||||||
}
|
}
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Successfully cached block " + key.id + " in " + key.bpid +
|
LOG.debug("Successfully cached " + key + ". We are now caching " +
|
||||||
". We are now caching " + newUsedBytes + " bytes in total.");
|
newUsedBytes + " bytes in total.");
|
||||||
}
|
}
|
||||||
numBlocksCached.addAndGet(1);
|
numBlocksCached.addAndGet(1);
|
||||||
success = true;
|
success = true;
|
||||||
|
@ -425,9 +386,8 @@ public void run() {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
newUsedBytes = usedBytesCount.release(length);
|
newUsedBytes = usedBytesCount.release(length);
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Caching of block " + key.id + " in " +
|
LOG.debug("Caching of " + key + " was aborted. We are now " +
|
||||||
key.bpid + " was aborted. We are now caching only " +
|
"caching only " + newUsedBytes + " + bytes in total.");
|
||||||
newUsedBytes + " + bytes in total.");
|
|
||||||
}
|
}
|
||||||
IOUtils.closeQuietly(blockIn);
|
IOUtils.closeQuietly(blockIn);
|
||||||
IOUtils.closeQuietly(metaIn);
|
IOUtils.closeQuietly(metaIn);
|
||||||
|
@ -445,9 +405,9 @@ public void run() {
|
||||||
}
|
}
|
||||||
|
|
||||||
private class UncachingTask implements Runnable {
|
private class UncachingTask implements Runnable {
|
||||||
private final Key key;
|
private final ExtendedBlockId key;
|
||||||
|
|
||||||
UncachingTask(Key key) {
|
UncachingTask(ExtendedBlockId key) {
|
||||||
this.key = key;
|
this.key = key;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -470,8 +430,8 @@ public void run() {
|
||||||
usedBytesCount.release(value.mappableBlock.getLength());
|
usedBytesCount.release(value.mappableBlock.getLength());
|
||||||
numBlocksCached.addAndGet(-1);
|
numBlocksCached.addAndGet(-1);
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Uncaching of block " + key.id + " in " + key.bpid +
|
LOG.debug("Uncaching of " + key + " completed. " +
|
||||||
" completed. usedBytes = " + newUsedBytes);
|
"usedBytes = " + newUsedBytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,8 +50,10 @@
|
||||||
import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
|
import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
|
||||||
import org.apache.hadoop.fs.CacheFlag;
|
import org.apache.hadoop.fs.CacheFlag;
|
||||||
import org.apache.hadoop.fs.InvalidRequestException;
|
import org.apache.hadoop.fs.InvalidRequestException;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.UnresolvedLinkException;
|
import org.apache.hadoop.fs.UnresolvedLinkException;
|
||||||
import org.apache.hadoop.fs.permission.FsAction;
|
import org.apache.hadoop.fs.permission.FsAction;
|
||||||
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.hadoop.hdfs.DFSUtil;
|
import org.apache.hadoop.hdfs.DFSUtil;
|
||||||
import org.apache.hadoop.hdfs.protocol.CacheDirective;
|
import org.apache.hadoop.hdfs.protocol.CacheDirective;
|
||||||
import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
|
import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
|
||||||
|
@ -62,11 +64,15 @@
|
||||||
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
|
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
|
||||||
|
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
|
import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList;
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
|
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
|
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
|
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
|
||||||
|
@ -81,6 +87,7 @@
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Cache Manager handles caching on DataNodes.
|
* The Cache Manager handles caching on DataNodes.
|
||||||
|
@ -167,6 +174,19 @@ public final class CacheManager {
|
||||||
*/
|
*/
|
||||||
private CacheReplicationMonitor monitor;
|
private CacheReplicationMonitor monitor;
|
||||||
|
|
||||||
|
public static final class PersistState {
|
||||||
|
public final CacheManagerSection section;
|
||||||
|
public final List<CachePoolInfoProto> pools;
|
||||||
|
public final List<CacheDirectiveInfoProto> directives;
|
||||||
|
|
||||||
|
public PersistState(CacheManagerSection section,
|
||||||
|
List<CachePoolInfoProto> pools, List<CacheDirectiveInfoProto> directives) {
|
||||||
|
this.section = section;
|
||||||
|
this.pools = pools;
|
||||||
|
this.directives = directives;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CacheManager(FSNamesystem namesystem, Configuration conf,
|
CacheManager(FSNamesystem namesystem, Configuration conf,
|
||||||
BlockManager blockManager) {
|
BlockManager blockManager) {
|
||||||
this.namesystem = namesystem;
|
this.namesystem = namesystem;
|
||||||
|
@ -944,6 +964,64 @@ public void saveStateCompat(DataOutputStream out, String sdPath)
|
||||||
serializerCompat.save(out, sdPath);
|
serializerCompat.save(out, sdPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public PersistState saveState() throws IOException {
|
||||||
|
ArrayList<CachePoolInfoProto> pools = Lists
|
||||||
|
.newArrayListWithCapacity(cachePools.size());
|
||||||
|
ArrayList<CacheDirectiveInfoProto> directives = Lists
|
||||||
|
.newArrayListWithCapacity(directivesById.size());
|
||||||
|
|
||||||
|
for (CachePool pool : cachePools.values()) {
|
||||||
|
CachePoolInfo p = pool.getInfo(true);
|
||||||
|
CachePoolInfoProto.Builder b = CachePoolInfoProto.newBuilder()
|
||||||
|
.setPoolName(p.getPoolName());
|
||||||
|
|
||||||
|
if (p.getOwnerName() != null)
|
||||||
|
b.setOwnerName(p.getOwnerName());
|
||||||
|
|
||||||
|
if (p.getGroupName() != null)
|
||||||
|
b.setGroupName(p.getGroupName());
|
||||||
|
|
||||||
|
if (p.getMode() != null)
|
||||||
|
b.setMode(p.getMode().toShort());
|
||||||
|
|
||||||
|
if (p.getLimit() != null)
|
||||||
|
b.setLimit(p.getLimit());
|
||||||
|
|
||||||
|
pools.add(b.build());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (CacheDirective directive : directivesById.values()) {
|
||||||
|
CacheDirectiveInfo info = directive.toInfo();
|
||||||
|
CacheDirectiveInfoProto.Builder b = CacheDirectiveInfoProto.newBuilder()
|
||||||
|
.setId(info.getId());
|
||||||
|
|
||||||
|
if (info.getPath() != null) {
|
||||||
|
b.setPath(info.getPath().toUri().getPath());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (info.getReplication() != null) {
|
||||||
|
b.setReplication(info.getReplication());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (info.getPool() != null) {
|
||||||
|
b.setPool(info.getPool());
|
||||||
|
}
|
||||||
|
|
||||||
|
Expiration expiry = info.getExpiration();
|
||||||
|
if (expiry != null) {
|
||||||
|
assert (!expiry.isRelative());
|
||||||
|
b.setExpiration(PBHelper.convert(expiry));
|
||||||
|
}
|
||||||
|
|
||||||
|
directives.add(b.build());
|
||||||
|
}
|
||||||
|
CacheManagerSection s = CacheManagerSection.newBuilder()
|
||||||
|
.setNextDirectiveId(nextDirectiveId).setNumPools(pools.size())
|
||||||
|
.setNumDirectives(directives.size()).build();
|
||||||
|
|
||||||
|
return new PersistState(s, pools, directives);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reloads CacheManager state from the passed DataInput. Used during namenode
|
* Reloads CacheManager state from the passed DataInput. Used during namenode
|
||||||
* startup to restore CacheManager state from an FSImage.
|
* startup to restore CacheManager state from an FSImage.
|
||||||
|
@ -954,6 +1032,56 @@ public void loadStateCompat(DataInput in) throws IOException {
|
||||||
serializerCompat.load(in);
|
serializerCompat.load(in);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void loadState(PersistState s) throws IOException {
|
||||||
|
nextDirectiveId = s.section.getNextDirectiveId();
|
||||||
|
for (CachePoolInfoProto p : s.pools) {
|
||||||
|
CachePoolInfo info = new CachePoolInfo(p.getPoolName());
|
||||||
|
if (p.hasOwnerName())
|
||||||
|
info.setOwnerName(p.getOwnerName());
|
||||||
|
|
||||||
|
if (p.hasGroupName())
|
||||||
|
info.setGroupName(p.getGroupName());
|
||||||
|
|
||||||
|
if (p.hasMode())
|
||||||
|
info.setMode(new FsPermission((short) p.getMode()));
|
||||||
|
|
||||||
|
if (p.hasLimit())
|
||||||
|
info.setLimit(p.getLimit());
|
||||||
|
|
||||||
|
addCachePool(info);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (CacheDirectiveInfoProto p : s.directives) {
|
||||||
|
// Get pool reference by looking it up in the map
|
||||||
|
final String poolName = p.getPool();
|
||||||
|
CacheDirective directive = new CacheDirective(p.getId(), new Path(
|
||||||
|
p.getPath()).toUri().getPath(), (short) p.getReplication(), p
|
||||||
|
.getExpiration().getMillis());
|
||||||
|
addCacheDirective(poolName, directive);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addCacheDirective(final String poolName,
|
||||||
|
final CacheDirective directive) throws IOException {
|
||||||
|
CachePool pool = cachePools.get(poolName);
|
||||||
|
if (pool == null) {
|
||||||
|
throw new IOException("Directive refers to pool " + poolName
|
||||||
|
+ ", which does not exist.");
|
||||||
|
}
|
||||||
|
boolean addedDirective = pool.getDirectiveList().add(directive);
|
||||||
|
assert addedDirective;
|
||||||
|
if (directivesById.put(directive.getId(), directive) != null) {
|
||||||
|
throw new IOException("A directive with ID " + directive.getId()
|
||||||
|
+ " already exists");
|
||||||
|
}
|
||||||
|
List<CacheDirective> directives = directivesByPath.get(directive.getPath());
|
||||||
|
if (directives == null) {
|
||||||
|
directives = new LinkedList<CacheDirective>();
|
||||||
|
directivesByPath.put(directive.getPath(), directives);
|
||||||
|
}
|
||||||
|
directives.add(directive);
|
||||||
|
}
|
||||||
|
|
||||||
private final class SerializerCompat {
|
private final class SerializerCompat {
|
||||||
private void save(DataOutputStream out, String sdPath) throws IOException {
|
private void save(DataOutputStream out, String sdPath) throws IOException {
|
||||||
out.writeLong(nextDirectiveId);
|
out.writeLong(nextDirectiveId);
|
||||||
|
@ -1036,27 +1164,10 @@ private void loadDirectives(DataInput in) throws IOException {
|
||||||
CacheDirectiveInfo info = FSImageSerialization.readCacheDirectiveInfo(in);
|
CacheDirectiveInfo info = FSImageSerialization.readCacheDirectiveInfo(in);
|
||||||
// Get pool reference by looking it up in the map
|
// Get pool reference by looking it up in the map
|
||||||
final String poolName = info.getPool();
|
final String poolName = info.getPool();
|
||||||
CachePool pool = cachePools.get(poolName);
|
|
||||||
if (pool == null) {
|
|
||||||
throw new IOException("Directive refers to pool " + poolName +
|
|
||||||
", which does not exist.");
|
|
||||||
}
|
|
||||||
CacheDirective directive =
|
CacheDirective directive =
|
||||||
new CacheDirective(info.getId(), info.getPath().toUri().getPath(),
|
new CacheDirective(info.getId(), info.getPath().toUri().getPath(),
|
||||||
info.getReplication(), info.getExpiration().getAbsoluteMillis());
|
info.getReplication(), info.getExpiration().getAbsoluteMillis());
|
||||||
boolean addedDirective = pool.getDirectiveList().add(directive);
|
addCacheDirective(poolName, directive);
|
||||||
assert addedDirective;
|
|
||||||
if (directivesById.put(directive.getId(), directive) != null) {
|
|
||||||
throw new IOException("A directive with ID " + directive.getId() +
|
|
||||||
" already exists");
|
|
||||||
}
|
|
||||||
List<CacheDirective> directives =
|
|
||||||
directivesByPath.get(directive.getPath());
|
|
||||||
if (directives == null) {
|
|
||||||
directives = new LinkedList<CacheDirective>();
|
|
||||||
directivesByPath.put(directive.getPath(), directives);
|
|
||||||
}
|
|
||||||
directives.add(directive);
|
|
||||||
counter.increment();
|
counter.increment();
|
||||||
}
|
}
|
||||||
prog.endStep(Phase.LOADING_FSIMAGE, step);
|
prog.endStep(Phase.LOADING_FSIMAGE, step);
|
||||||
|
|
|
@ -587,6 +587,8 @@ public void toXML(XMLOutputter doc) throws IOException {
|
||||||
toXmlItemBlockWithLink(doc, nn.host, nn.httpAddress, "NameNode");
|
toXmlItemBlockWithLink(doc, nn.host, nn.httpAddress, "NameNode");
|
||||||
toXmlItemBlock(doc, "Blockpool Used",
|
toXmlItemBlock(doc, "Blockpool Used",
|
||||||
StringUtils.byteDesc(nn.bpUsed));
|
StringUtils.byteDesc(nn.bpUsed));
|
||||||
|
toXmlItemBlock(doc, "Blockpool Used%",
|
||||||
|
DFSUtil.percent2String(DFSUtil.getPercentUsed(nn.bpUsed, total)));
|
||||||
toXmlItemBlock(doc, "Files And Directories",
|
toXmlItemBlock(doc, "Files And Directories",
|
||||||
Long.toString(nn.filesAndDirectories));
|
Long.toString(nn.filesAndDirectories));
|
||||||
toXmlItemBlock(doc, "Blocks", Long.toString(nn.blocksCount));
|
toXmlItemBlock(doc, "Blocks", Long.toString(nn.blocksCount));
|
||||||
|
|
|
@ -42,6 +42,12 @@ public interface FSClusterStats {
|
||||||
* for writing targets, and false otherwise.
|
* for writing targets, and false otherwise.
|
||||||
*/
|
*/
|
||||||
public boolean isAvoidingStaleDataNodesForWrite();
|
public boolean isAvoidingStaleDataNodesForWrite();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicates number of datanodes that are in service.
|
||||||
|
* @return Number of datanodes that are both alive and not decommissioned.
|
||||||
|
*/
|
||||||
|
public int getNumDatanodesInService();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -813,8 +813,7 @@ private void loadFSImage(File imageFile, FSNamesystem target,
|
||||||
*/
|
*/
|
||||||
private void loadFSImage(File curFile, MD5Hash expectedMd5,
|
private void loadFSImage(File curFile, MD5Hash expectedMd5,
|
||||||
FSNamesystem target, MetaRecoveryContext recovery) throws IOException {
|
FSNamesystem target, MetaRecoveryContext recovery) throws IOException {
|
||||||
FSImageFormat.Loader loader = new FSImageFormat.Loader(
|
FSImageFormat.LoaderDelegator loader = FSImageFormat.newLoader(conf, target);
|
||||||
conf, target);
|
|
||||||
loader.load(curFile);
|
loader.load(curFile);
|
||||||
target.setBlockPoolId(this.getBlockPoolID());
|
target.setBlockPoolId(this.getBlockPoolID());
|
||||||
|
|
||||||
|
@ -843,7 +842,7 @@ void saveFSImage(SaveNamespaceContext context, StorageDirectory sd)
|
||||||
File newFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE_NEW, txid);
|
File newFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE_NEW, txid);
|
||||||
File dstFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE, txid);
|
File dstFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE, txid);
|
||||||
|
|
||||||
FSImageFormat.Saver saver = new FSImageFormat.Saver(context);
|
FSImageFormatProtobuf.Saver saver = new FSImageFormatProtobuf.Saver(context);
|
||||||
FSImageCompression compression = FSImageCompression.createCompression(conf);
|
FSImageCompression compression = FSImageCompression.createCompression(conf);
|
||||||
saver.save(newFile, compression);
|
saver.save(newFile, compression);
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,10 @@ private FSImageCompression(CompressionCodec codec) {
|
||||||
imageCodec = codec;
|
imageCodec = codec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public CompressionCodec getImageCodec() {
|
||||||
|
return imageCodec;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a "noop" compression - i.e. uncompressed
|
* Create a "noop" compression - i.e. uncompressed
|
||||||
*/
|
*/
|
||||||
|
@ -89,7 +93,7 @@ static FSImageCompression createCompression(Configuration conf)
|
||||||
* Create a compression instance using the codec specified by
|
* Create a compression instance using the codec specified by
|
||||||
* <code>codecClassName</code>
|
* <code>codecClassName</code>
|
||||||
*/
|
*/
|
||||||
private static FSImageCompression createCompression(Configuration conf,
|
static FSImageCompression createCompression(Configuration conf,
|
||||||
String codecClassName)
|
String codecClassName)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
|
|
|
@ -68,12 +68,13 @@
|
||||||
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
|
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
|
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
|
||||||
import org.apache.hadoop.hdfs.util.ReadOnlyList;
|
import org.apache.hadoop.hdfs.util.ReadOnlyList;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.io.MD5Hash;
|
import org.apache.hadoop.io.MD5Hash;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Contains inner classes for reading or writing the on-disk format for
|
* Contains inner classes for reading or writing the on-disk format for
|
||||||
|
@ -184,12 +185,70 @@ public class FSImageFormat {
|
||||||
// Static-only class
|
// Static-only class
|
||||||
private FSImageFormat() {}
|
private FSImageFormat() {}
|
||||||
|
|
||||||
|
interface AbstractLoader {
|
||||||
|
MD5Hash getLoadedImageMd5();
|
||||||
|
long getLoadedImageTxId();
|
||||||
|
}
|
||||||
|
|
||||||
|
static class LoaderDelegator implements AbstractLoader {
|
||||||
|
private AbstractLoader impl;
|
||||||
|
private final Configuration conf;
|
||||||
|
private final FSNamesystem fsn;
|
||||||
|
|
||||||
|
LoaderDelegator(Configuration conf, FSNamesystem fsn) {
|
||||||
|
this.conf = conf;
|
||||||
|
this.fsn = fsn;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MD5Hash getLoadedImageMd5() {
|
||||||
|
return impl.getLoadedImageMd5();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getLoadedImageTxId() {
|
||||||
|
return impl.getLoadedImageTxId();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void load(File file) throws IOException {
|
||||||
|
Preconditions.checkState(impl == null, "Image already loaded!");
|
||||||
|
|
||||||
|
FileInputStream is = null;
|
||||||
|
try {
|
||||||
|
is = new FileInputStream(file);
|
||||||
|
byte[] magic = new byte[FSImageUtil.MAGIC_HEADER.length];
|
||||||
|
IOUtils.readFully(is, magic, 0, magic.length);
|
||||||
|
if (Arrays.equals(magic, FSImageUtil.MAGIC_HEADER)) {
|
||||||
|
FSImageFormatProtobuf.Loader loader = new FSImageFormatProtobuf.Loader(
|
||||||
|
conf, fsn);
|
||||||
|
impl = loader;
|
||||||
|
loader.load(file);
|
||||||
|
} else {
|
||||||
|
Loader loader = new Loader(conf, fsn);
|
||||||
|
impl = loader;
|
||||||
|
loader.load(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
IOUtils.cleanup(LOG, is);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a loader class to load the image. It chooses the loader based on
|
||||||
|
* the layout version.
|
||||||
|
*/
|
||||||
|
public static LoaderDelegator newLoader(Configuration conf, FSNamesystem fsn) {
|
||||||
|
return new LoaderDelegator(conf, fsn);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A one-shot class responsible for loading an image. The load() function
|
* A one-shot class responsible for loading an image. The load() function
|
||||||
* should be called once, after which the getter methods may be used to retrieve
|
* should be called once, after which the getter methods may be used to retrieve
|
||||||
* information about the image that was loaded, if loading was successful.
|
* information about the image that was loaded, if loading was successful.
|
||||||
*/
|
*/
|
||||||
public static class Loader {
|
public static class Loader implements AbstractLoader {
|
||||||
private final Configuration conf;
|
private final Configuration conf;
|
||||||
/** which namesystem this loader is working for */
|
/** which namesystem this loader is working for */
|
||||||
private final FSNamesystem namesystem;
|
private final FSNamesystem namesystem;
|
||||||
|
@ -214,12 +273,14 @@ public static class Loader {
|
||||||
* Return the MD5 checksum of the image that has been loaded.
|
* Return the MD5 checksum of the image that has been loaded.
|
||||||
* @throws IllegalStateException if load() has not yet been called.
|
* @throws IllegalStateException if load() has not yet been called.
|
||||||
*/
|
*/
|
||||||
MD5Hash getLoadedImageMd5() {
|
@Override
|
||||||
|
public MD5Hash getLoadedImageMd5() {
|
||||||
checkLoaded();
|
checkLoaded();
|
||||||
return imgDigest;
|
return imgDigest;
|
||||||
}
|
}
|
||||||
|
|
||||||
long getLoadedImageTxId() {
|
@Override
|
||||||
|
public long getLoadedImageTxId() {
|
||||||
checkLoaded();
|
checkLoaded();
|
||||||
return imgTxId;
|
return imgTxId;
|
||||||
}
|
}
|
||||||
|
@ -242,7 +303,7 @@ private void checkNotLoaded() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void load(File curFile) throws IOException {
|
public void load(File curFile) throws IOException {
|
||||||
checkNotLoaded();
|
checkNotLoaded();
|
||||||
assert curFile != null : "curFile is null";
|
assert curFile != null : "curFile is null";
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,426 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.HadoopIllegalArgumentException;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
|
import org.apache.hadoop.fs.permission.PermissionStatus;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.Block;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
|
||||||
|
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SaverContext;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
|
||||||
|
import org.apache.hadoop.hdfs.util.ReadOnlyList;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.protobuf.ByteString;
|
||||||
|
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public final class FSImageFormatPBINode {
|
||||||
|
private final static long USER_GROUP_STRID_MASK = (1 << 24) - 1;
|
||||||
|
private final static int USER_STRID_OFFSET = 40;
|
||||||
|
private final static int GROUP_STRID_OFFSET = 16;
|
||||||
|
private static final Log LOG = LogFactory.getLog(FSImageFormatProtobuf.class);
|
||||||
|
|
||||||
|
public final static class Loader {
|
||||||
|
public static PermissionStatus loadPermission(long id,
|
||||||
|
final String[] stringTable) {
|
||||||
|
short perm = (short) (id & ((1 << GROUP_STRID_OFFSET) - 1));
|
||||||
|
int gsid = (int) ((id >> GROUP_STRID_OFFSET) & USER_GROUP_STRID_MASK);
|
||||||
|
int usid = (int) ((id >> USER_STRID_OFFSET) & USER_GROUP_STRID_MASK);
|
||||||
|
return new PermissionStatus(stringTable[usid], stringTable[gsid],
|
||||||
|
new FsPermission(perm));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static INodeDirectory loadINodeDirectory(INodeSection.INode n,
|
||||||
|
final String[] stringTable) {
|
||||||
|
assert n.getType() == INodeSection.INode.Type.DIRECTORY;
|
||||||
|
INodeSection.INodeDirectory d = n.getDirectory();
|
||||||
|
|
||||||
|
final PermissionStatus permissions = loadPermission(d.getPermission(),
|
||||||
|
stringTable);
|
||||||
|
final INodeDirectory dir = new INodeDirectory(n.getId(), n.getName()
|
||||||
|
.toByteArray(), permissions, d.getModificationTime());
|
||||||
|
|
||||||
|
final long nsQuota = d.getNsQuota(), dsQuota = d.getDsQuota();
|
||||||
|
if (nsQuota >= 0 || dsQuota >= 0) {
|
||||||
|
dir.addDirectoryWithQuotaFeature(nsQuota, dsQuota);
|
||||||
|
}
|
||||||
|
return dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void updateBlocksMap(INodeFile file, BlockManager bm) {
|
||||||
|
// Add file->block mapping
|
||||||
|
final BlockInfo[] blocks = file.getBlocks();
|
||||||
|
if (blocks != null) {
|
||||||
|
for (int i = 0; i < blocks.length; i++) {
|
||||||
|
file.setBlock(i, bm.addBlockCollection(blocks[i], file));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final FSDirectory dir;
|
||||||
|
private final FSNamesystem fsn;
|
||||||
|
private final FSImageFormatProtobuf.Loader parent;
|
||||||
|
|
||||||
|
Loader(FSNamesystem fsn, final FSImageFormatProtobuf.Loader parent) {
|
||||||
|
this.fsn = fsn;
|
||||||
|
this.dir = fsn.dir;
|
||||||
|
this.parent = parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
void loadINodeDirectorySection(InputStream in) throws IOException {
|
||||||
|
final List<INodeReference> refList = parent.getLoaderContext()
|
||||||
|
.getRefList();
|
||||||
|
while (true) {
|
||||||
|
INodeDirectorySection.DirEntry e = INodeDirectorySection.DirEntry
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
// note that in is a LimitedInputStream
|
||||||
|
if (e == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
INodeDirectory p = dir.getInode(e.getParent()).asDirectory();
|
||||||
|
for (long id : e.getChildrenList()) {
|
||||||
|
INode child = dir.getInode(id);
|
||||||
|
addToParent(p, child);
|
||||||
|
}
|
||||||
|
for (int refId : e.getRefChildrenList()) {
|
||||||
|
INodeReference ref = refList.get(refId);
|
||||||
|
addToParent(p, ref);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void loadINodeSection(InputStream in) throws IOException {
|
||||||
|
INodeSection s = INodeSection.parseDelimitedFrom(in);
|
||||||
|
fsn.resetLastInodeId(s.getLastInodeId());
|
||||||
|
LOG.info("Loading " + s.getNumInodes() + " INodes.");
|
||||||
|
for (int i = 0; i < s.getNumInodes(); ++i) {
|
||||||
|
INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in);
|
||||||
|
if (p.getId() == INodeId.ROOT_INODE_ID) {
|
||||||
|
loadRootINode(p);
|
||||||
|
} else {
|
||||||
|
INode n = loadINode(p);
|
||||||
|
dir.addToInodeMap(n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load the under-construction files section, and update the lease map
|
||||||
|
*/
|
||||||
|
void loadFilesUnderConstructionSection(InputStream in) throws IOException {
|
||||||
|
while (true) {
|
||||||
|
FileUnderConstructionEntry entry = FileUnderConstructionEntry
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
if (entry == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// update the lease manager
|
||||||
|
INodeFile file = dir.getInode(entry.getInodeId()).asFile();
|
||||||
|
FileUnderConstructionFeature uc = file.getFileUnderConstructionFeature();
|
||||||
|
Preconditions.checkState(uc != null); // file must be under-construction
|
||||||
|
fsn.leaseManager.addLease(uc.getClientName(), entry.getFullPath());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addToParent(INodeDirectory parent, INode child) {
|
||||||
|
if (parent == dir.rootDir && FSDirectory.isReservedName(child)) {
|
||||||
|
throw new HadoopIllegalArgumentException("File name \""
|
||||||
|
+ child.getLocalName() + "\" is reserved. Please "
|
||||||
|
+ " change the name of the existing file or directory to another "
|
||||||
|
+ "name before upgrading to this release.");
|
||||||
|
}
|
||||||
|
// NOTE: This does not update space counts for parents
|
||||||
|
if (!parent.addChild(child)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
dir.cacheName(child);
|
||||||
|
|
||||||
|
if (child.isFile()) {
|
||||||
|
updateBlocksMap(child.asFile(), fsn.getBlockManager());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private INode loadINode(INodeSection.INode n) {
|
||||||
|
switch (n.getType()) {
|
||||||
|
case FILE:
|
||||||
|
return loadINodeFile(n);
|
||||||
|
case DIRECTORY:
|
||||||
|
return loadINodeDirectory(n, parent.getLoaderContext().getStringTable());
|
||||||
|
case SYMLINK:
|
||||||
|
return loadINodeSymlink(n);
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private INodeFile loadINodeFile(INodeSection.INode n) {
|
||||||
|
assert n.getType() == INodeSection.INode.Type.FILE;
|
||||||
|
INodeSection.INodeFile f = n.getFile();
|
||||||
|
List<BlockProto> bp = f.getBlocksList();
|
||||||
|
short replication = (short) f.getReplication();
|
||||||
|
|
||||||
|
BlockInfo[] blocks = new BlockInfo[bp.size()];
|
||||||
|
for (int i = 0, e = bp.size(); i < e; ++i) {
|
||||||
|
blocks[i] = new BlockInfo(PBHelper.convert(bp.get(i)), replication);
|
||||||
|
}
|
||||||
|
final PermissionStatus permissions = loadPermission(f.getPermission(),
|
||||||
|
parent.getLoaderContext().getStringTable());
|
||||||
|
|
||||||
|
final INodeFile file = new INodeFile(n.getId(),
|
||||||
|
n.getName().toByteArray(), permissions, f.getModificationTime(),
|
||||||
|
f.getAccessTime(), blocks, replication, f.getPreferredBlockSize());
|
||||||
|
// under-construction information
|
||||||
|
if (f.hasFileUC()) {
|
||||||
|
INodeSection.FileUnderConstructionFeature uc = f.getFileUC();
|
||||||
|
file.toUnderConstruction(uc.getClientName(), uc.getClientMachine(),
|
||||||
|
null);
|
||||||
|
if (blocks.length > 0) {
|
||||||
|
BlockInfo lastBlk = file.getLastBlock();
|
||||||
|
// replace the last block of file
|
||||||
|
file.setBlock(file.numBlocks() - 1, new BlockInfoUnderConstruction(
|
||||||
|
lastBlk, replication));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return file;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private INodeSymlink loadINodeSymlink(INodeSection.INode n) {
|
||||||
|
assert n.getType() == INodeSection.INode.Type.SYMLINK;
|
||||||
|
INodeSection.INodeSymlink s = n.getSymlink();
|
||||||
|
final PermissionStatus permissions = loadPermission(s.getPermission(),
|
||||||
|
parent.getLoaderContext().getStringTable());
|
||||||
|
return new INodeSymlink(n.getId(), n.getName().toByteArray(), permissions,
|
||||||
|
0, 0, s.getTarget().toStringUtf8());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void loadRootINode(INodeSection.INode p) {
|
||||||
|
INodeDirectory root = loadINodeDirectory(p, parent.getLoaderContext()
|
||||||
|
.getStringTable());
|
||||||
|
final Quota.Counts q = root.getQuotaCounts();
|
||||||
|
final long nsQuota = q.get(Quota.NAMESPACE);
|
||||||
|
final long dsQuota = q.get(Quota.DISKSPACE);
|
||||||
|
if (nsQuota != -1 || dsQuota != -1) {
|
||||||
|
dir.rootDir.getDirectoryWithQuotaFeature().setQuota(nsQuota, dsQuota);
|
||||||
|
}
|
||||||
|
dir.rootDir.cloneModificationTime(root);
|
||||||
|
dir.rootDir.clonePermissionStatus(root);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public final static class Saver {
|
||||||
|
private static long buildPermissionStatus(INodeAttributes n,
|
||||||
|
final SaverContext.DeduplicationMap<String> stringMap) {
|
||||||
|
long userId = stringMap.getId(n.getUserName());
|
||||||
|
long groupId = stringMap.getId(n.getGroupName());
|
||||||
|
return ((userId & USER_GROUP_STRID_MASK) << USER_STRID_OFFSET)
|
||||||
|
| ((groupId & USER_GROUP_STRID_MASK) << GROUP_STRID_OFFSET)
|
||||||
|
| n.getFsPermissionShort();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static INodeSection.INodeFile.Builder buildINodeFile(
|
||||||
|
INodeFileAttributes file,
|
||||||
|
final SaverContext.DeduplicationMap<String> stringMap) {
|
||||||
|
INodeSection.INodeFile.Builder b = INodeSection.INodeFile.newBuilder()
|
||||||
|
.setAccessTime(file.getAccessTime())
|
||||||
|
.setModificationTime(file.getModificationTime())
|
||||||
|
.setPermission(buildPermissionStatus(file, stringMap))
|
||||||
|
.setPreferredBlockSize(file.getPreferredBlockSize())
|
||||||
|
.setReplication(file.getFileReplication());
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static INodeSection.INodeDirectory.Builder buildINodeDirectory(
|
||||||
|
INodeDirectoryAttributes dir,
|
||||||
|
final SaverContext.DeduplicationMap<String> stringMap) {
|
||||||
|
Quota.Counts quota = dir.getQuotaCounts();
|
||||||
|
INodeSection.INodeDirectory.Builder b = INodeSection.INodeDirectory
|
||||||
|
.newBuilder().setModificationTime(dir.getModificationTime())
|
||||||
|
.setNsQuota(quota.get(Quota.NAMESPACE))
|
||||||
|
.setDsQuota(quota.get(Quota.DISKSPACE))
|
||||||
|
.setPermission(buildPermissionStatus(dir, stringMap));
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final FSNamesystem fsn;
|
||||||
|
private final FileSummary.Builder summary;
|
||||||
|
private final SaveNamespaceContext context;
|
||||||
|
private final FSImageFormatProtobuf.Saver parent;
|
||||||
|
|
||||||
|
Saver(FSImageFormatProtobuf.Saver parent, FileSummary.Builder summary) {
|
||||||
|
this.parent = parent;
|
||||||
|
this.summary = summary;
|
||||||
|
this.context = parent.getContext();
|
||||||
|
this.fsn = context.getSourceNamesystem();
|
||||||
|
}
|
||||||
|
|
||||||
|
void serializeINodeDirectorySection(OutputStream out) throws IOException {
|
||||||
|
Iterator<INodeWithAdditionalFields> iter = fsn.getFSDirectory()
|
||||||
|
.getINodeMap().getMapIterator();
|
||||||
|
final ArrayList<INodeReference> refList = parent.getSaverContext()
|
||||||
|
.getRefList();
|
||||||
|
int i = 0;
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
INodeWithAdditionalFields n = iter.next();
|
||||||
|
if (!n.isDirectory()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
ReadOnlyList<INode> children = n.asDirectory().getChildrenList(
|
||||||
|
Snapshot.CURRENT_STATE_ID);
|
||||||
|
if (children.size() > 0) {
|
||||||
|
INodeDirectorySection.DirEntry.Builder b = INodeDirectorySection.
|
||||||
|
DirEntry.newBuilder().setParent(n.getId());
|
||||||
|
for (INode inode : children) {
|
||||||
|
if (!inode.isReference()) {
|
||||||
|
b.addChildren(inode.getId());
|
||||||
|
} else {
|
||||||
|
refList.add(inode.asReference());
|
||||||
|
b.addRefChildren(refList.size() - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
INodeDirectorySection.DirEntry e = b.build();
|
||||||
|
e.writeDelimitedTo(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
++i;
|
||||||
|
if (i % FSImageFormatProtobuf.Saver.CHECK_CANCEL_INTERVAL == 0) {
|
||||||
|
context.checkCancelled();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
parent.commitSection(summary,
|
||||||
|
FSImageFormatProtobuf.SectionName.INODE_DIR);
|
||||||
|
}
|
||||||
|
|
||||||
|
void serializeINodeSection(OutputStream out) throws IOException {
|
||||||
|
INodeMap inodesMap = fsn.dir.getINodeMap();
|
||||||
|
|
||||||
|
INodeSection.Builder b = INodeSection.newBuilder()
|
||||||
|
.setLastInodeId(fsn.getLastInodeId()).setNumInodes(inodesMap.size());
|
||||||
|
INodeSection s = b.build();
|
||||||
|
s.writeDelimitedTo(out);
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
Iterator<INodeWithAdditionalFields> iter = inodesMap.getMapIterator();
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
INodeWithAdditionalFields n = iter.next();
|
||||||
|
save(out, n);
|
||||||
|
++i;
|
||||||
|
if (i % FSImageFormatProtobuf.Saver.CHECK_CANCEL_INTERVAL == 0) {
|
||||||
|
context.checkCancelled();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
parent.commitSection(summary, FSImageFormatProtobuf.SectionName.INODE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void serializeFilesUCSection(OutputStream out) throws IOException {
|
||||||
|
Map<String, INodeFile> ucMap = fsn.getFilesUnderConstruction();
|
||||||
|
for (Map.Entry<String, INodeFile> entry : ucMap.entrySet()) {
|
||||||
|
String path = entry.getKey();
|
||||||
|
INodeFile file = entry.getValue();
|
||||||
|
FileUnderConstructionEntry.Builder b = FileUnderConstructionEntry
|
||||||
|
.newBuilder().setInodeId(file.getId()).setFullPath(path);
|
||||||
|
FileUnderConstructionEntry e = b.build();
|
||||||
|
e.writeDelimitedTo(out);
|
||||||
|
}
|
||||||
|
parent.commitSection(summary,
|
||||||
|
FSImageFormatProtobuf.SectionName.FILES_UNDERCONSTRUCTION);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void save(OutputStream out, INode n) throws IOException {
|
||||||
|
if (n.isDirectory()) {
|
||||||
|
save(out, n.asDirectory());
|
||||||
|
} else if (n.isFile()) {
|
||||||
|
save(out, n.asFile());
|
||||||
|
} else if (n.isSymlink()) {
|
||||||
|
save(out, n.asSymlink());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void save(OutputStream out, INodeDirectory n) throws IOException {
|
||||||
|
INodeSection.INodeDirectory.Builder b = buildINodeDirectory(n,
|
||||||
|
parent.getSaverContext().getStringMap());
|
||||||
|
INodeSection.INode r = buildINodeCommon(n)
|
||||||
|
.setType(INodeSection.INode.Type.DIRECTORY).setDirectory(b).build();
|
||||||
|
r.writeDelimitedTo(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void save(OutputStream out, INodeFile n) throws IOException {
|
||||||
|
INodeSection.INodeFile.Builder b = buildINodeFile(n,
|
||||||
|
parent.getSaverContext().getStringMap());
|
||||||
|
|
||||||
|
for (Block block : n.getBlocks()) {
|
||||||
|
b.addBlocks(PBHelper.convert(block));
|
||||||
|
}
|
||||||
|
|
||||||
|
FileUnderConstructionFeature uc = n.getFileUnderConstructionFeature();
|
||||||
|
if (uc != null) {
|
||||||
|
INodeSection.FileUnderConstructionFeature f =
|
||||||
|
INodeSection.FileUnderConstructionFeature
|
||||||
|
.newBuilder().setClientName(uc.getClientName())
|
||||||
|
.setClientMachine(uc.getClientMachine()).build();
|
||||||
|
b.setFileUC(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
INodeSection.INode r = buildINodeCommon(n)
|
||||||
|
.setType(INodeSection.INode.Type.FILE).setFile(b).build();
|
||||||
|
r.writeDelimitedTo(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void save(OutputStream out, INodeSymlink n) throws IOException {
|
||||||
|
INodeSection.INodeSymlink.Builder b = INodeSection.INodeSymlink
|
||||||
|
.newBuilder()
|
||||||
|
.setPermission(buildPermissionStatus(n, parent.getSaverContext().getStringMap()))
|
||||||
|
.setTarget(ByteString.copyFrom(n.getSymlink()));
|
||||||
|
INodeSection.INode r = buildINodeCommon(n)
|
||||||
|
.setType(INodeSection.INode.Type.SYMLINK).setSymlink(b).build();
|
||||||
|
r.writeDelimitedTo(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final INodeSection.INode.Builder buildINodeCommon(INode n) {
|
||||||
|
return INodeSection.INode.newBuilder()
|
||||||
|
.setId(n.getId())
|
||||||
|
.setName(ByteString.copyFrom(n.getLocalNameBytes()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private FSImageFormatPBINode() {
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,583 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
|
import java.io.BufferedOutputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.channels.FileChannel;
|
||||||
|
import java.security.DigestOutputStream;
|
||||||
|
import java.security.MessageDigest;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
|
||||||
|
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.snapshot.FSImageFormatPBSnapshot;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
|
||||||
|
import org.apache.hadoop.hdfs.util.MD5FileUtils;
|
||||||
|
import org.apache.hadoop.io.MD5Hash;
|
||||||
|
import org.apache.hadoop.io.compress.CompressionCodec;
|
||||||
|
import org.apache.hadoop.io.compress.CompressorStream;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
import com.google.common.io.LimitInputStream;
|
||||||
|
import com.google.protobuf.CodedOutputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility class to read / write fsimage in protobuf format.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public final class FSImageFormatProtobuf {
|
||||||
|
private static final Log LOG = LogFactory.getLog(FSImageFormatProtobuf.class);
|
||||||
|
|
||||||
|
public static final class LoaderContext {
|
||||||
|
private String[] stringTable;
|
||||||
|
private final ArrayList<INodeReference> refList = Lists.newArrayList();
|
||||||
|
|
||||||
|
public String[] getStringTable() {
|
||||||
|
return stringTable;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList<INodeReference> getRefList() {
|
||||||
|
return refList;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final class SaverContext {
|
||||||
|
public static class DeduplicationMap<E> {
|
||||||
|
private final Map<E, Integer> map = Maps.newHashMap();
|
||||||
|
private DeduplicationMap() {}
|
||||||
|
|
||||||
|
static <T> DeduplicationMap<T> newMap() {
|
||||||
|
return new DeduplicationMap<T>();
|
||||||
|
}
|
||||||
|
|
||||||
|
int getId(E value) {
|
||||||
|
if (value == null) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
Integer v = map.get(value);
|
||||||
|
if (v == null) {
|
||||||
|
int nv = map.size() + 1;
|
||||||
|
map.put(value, nv);
|
||||||
|
return nv;
|
||||||
|
}
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
int size() {
|
||||||
|
return map.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<Entry<E, Integer>> entrySet() {
|
||||||
|
return map.entrySet();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private final DeduplicationMap<String> stringMap = DeduplicationMap.newMap();
|
||||||
|
private final ArrayList<INodeReference> refList = Lists.newArrayList();
|
||||||
|
|
||||||
|
public DeduplicationMap<String> getStringMap() {
|
||||||
|
return stringMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList<INodeReference> getRefList() {
|
||||||
|
return refList;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final class Loader implements FSImageFormat.AbstractLoader {
|
||||||
|
static final int MINIMUM_FILE_LENGTH = 8;
|
||||||
|
private final Configuration conf;
|
||||||
|
private final FSNamesystem fsn;
|
||||||
|
private final LoaderContext ctx;
|
||||||
|
/** The MD5 sum of the loaded file */
|
||||||
|
private MD5Hash imgDigest;
|
||||||
|
/** The transaction ID of the last edit represented by the loaded file */
|
||||||
|
private long imgTxId;
|
||||||
|
|
||||||
|
Loader(Configuration conf, FSNamesystem fsn) {
|
||||||
|
this.conf = conf;
|
||||||
|
this.fsn = fsn;
|
||||||
|
this.ctx = new LoaderContext();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MD5Hash getLoadedImageMd5() {
|
||||||
|
return imgDigest;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getLoadedImageTxId() {
|
||||||
|
return imgTxId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public LoaderContext getLoaderContext() {
|
||||||
|
return ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
void load(File file) throws IOException {
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
imgDigest = MD5FileUtils.computeMd5ForFile(file);
|
||||||
|
RandomAccessFile raFile = new RandomAccessFile(file, "r");
|
||||||
|
FileInputStream fin = new FileInputStream(file);
|
||||||
|
try {
|
||||||
|
loadInternal(raFile, fin);
|
||||||
|
long end = System.currentTimeMillis();
|
||||||
|
LOG.info("Loaded FSImage in " + (end - start) / 1000 + " seconds.");
|
||||||
|
} finally {
|
||||||
|
fin.close();
|
||||||
|
raFile.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void loadInternal(RandomAccessFile raFile, FileInputStream fin)
|
||||||
|
throws IOException {
|
||||||
|
if (!FSImageUtil.checkFileFormat(raFile)) {
|
||||||
|
throw new IOException("Unrecognized file format");
|
||||||
|
}
|
||||||
|
FileSummary summary = FSImageUtil.loadSummary(raFile);
|
||||||
|
|
||||||
|
FileChannel channel = fin.getChannel();
|
||||||
|
|
||||||
|
FSImageFormatPBINode.Loader inodeLoader = new FSImageFormatPBINode.Loader(
|
||||||
|
fsn, this);
|
||||||
|
FSImageFormatPBSnapshot.Loader snapshotLoader = new FSImageFormatPBSnapshot.Loader(
|
||||||
|
fsn, this);
|
||||||
|
|
||||||
|
ArrayList<FileSummary.Section> sections = Lists.newArrayList(summary
|
||||||
|
.getSectionsList());
|
||||||
|
Collections.sort(sections, new Comparator<FileSummary.Section>() {
|
||||||
|
@Override
|
||||||
|
public int compare(FileSummary.Section s1, FileSummary.Section s2) {
|
||||||
|
SectionName n1 = SectionName.fromString(s1.getName());
|
||||||
|
SectionName n2 = SectionName.fromString(s2.getName());
|
||||||
|
if (n1 == null) {
|
||||||
|
return n2 == null ? 0 : -1;
|
||||||
|
} else if (n2 == null) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return n1.ordinal() - n2.ordinal();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
StartupProgress prog = NameNode.getStartupProgress();
|
||||||
|
/**
|
||||||
|
* beginStep() and the endStep() calls do not match the boundary of the
|
||||||
|
* sections. This is because that the current implementation only allows
|
||||||
|
* a particular step to be started for once.
|
||||||
|
*/
|
||||||
|
Step currentStep = null;
|
||||||
|
|
||||||
|
for (FileSummary.Section s : sections) {
|
||||||
|
channel.position(s.getOffset());
|
||||||
|
InputStream in = new BufferedInputStream(new LimitInputStream(fin,
|
||||||
|
s.getLength()));
|
||||||
|
|
||||||
|
in = FSImageUtil.wrapInputStreamForCompression(conf,
|
||||||
|
summary.getCodec(), in);
|
||||||
|
|
||||||
|
String n = s.getName();
|
||||||
|
|
||||||
|
switch (SectionName.fromString(n)) {
|
||||||
|
case NS_INFO:
|
||||||
|
loadNameSystemSection(in);
|
||||||
|
break;
|
||||||
|
case STRING_TABLE:
|
||||||
|
loadStringTableSection(in);
|
||||||
|
break;
|
||||||
|
case INODE: {
|
||||||
|
currentStep = new Step(StepType.INODES);
|
||||||
|
prog.beginStep(Phase.LOADING_FSIMAGE, currentStep);
|
||||||
|
inodeLoader.loadINodeSection(in);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case INODE_REFRENCE:
|
||||||
|
snapshotLoader.loadINodeReferenceSection(in);
|
||||||
|
break;
|
||||||
|
case INODE_DIR:
|
||||||
|
inodeLoader.loadINodeDirectorySection(in);
|
||||||
|
break;
|
||||||
|
case FILES_UNDERCONSTRUCTION:
|
||||||
|
inodeLoader.loadFilesUnderConstructionSection(in);
|
||||||
|
break;
|
||||||
|
case SNAPSHOT:
|
||||||
|
snapshotLoader.loadSnapshotSection(in);
|
||||||
|
break;
|
||||||
|
case SNAPSHOT_DIFF:
|
||||||
|
snapshotLoader.loadSnapshotDiffSection(in);
|
||||||
|
break;
|
||||||
|
case SECRET_MANAGER: {
|
||||||
|
prog.endStep(Phase.LOADING_FSIMAGE, currentStep);
|
||||||
|
Step step = new Step(StepType.DELEGATION_TOKENS);
|
||||||
|
prog.beginStep(Phase.LOADING_FSIMAGE, step);
|
||||||
|
loadSecretManagerSection(in);
|
||||||
|
prog.endStep(Phase.LOADING_FSIMAGE, step);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case CACHE_MANAGER: {
|
||||||
|
Step step = new Step(StepType.CACHE_POOLS);
|
||||||
|
prog.beginStep(Phase.LOADING_FSIMAGE, step);
|
||||||
|
loadCacheManagerSection(in);
|
||||||
|
prog.endStep(Phase.LOADING_FSIMAGE, step);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG.warn("Unregconized section " + n);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void loadNameSystemSection(InputStream in) throws IOException {
|
||||||
|
NameSystemSection s = NameSystemSection.parseDelimitedFrom(in);
|
||||||
|
fsn.setGenerationStampV1(s.getGenstampV1());
|
||||||
|
fsn.setGenerationStampV2(s.getGenstampV2());
|
||||||
|
fsn.setGenerationStampV1Limit(s.getGenstampV1Limit());
|
||||||
|
fsn.setLastAllocatedBlockId(s.getLastAllocatedBlockId());
|
||||||
|
imgTxId = s.getTransactionId();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void loadStringTableSection(InputStream in) throws IOException {
|
||||||
|
StringTableSection s = StringTableSection.parseDelimitedFrom(in);
|
||||||
|
ctx.stringTable = new String[s.getNumEntry() + 1];
|
||||||
|
for (int i = 0; i < s.getNumEntry(); ++i) {
|
||||||
|
StringTableSection.Entry e = StringTableSection.Entry
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
ctx.stringTable[e.getId()] = e.getStr();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void loadSecretManagerSection(InputStream in) throws IOException {
|
||||||
|
SecretManagerSection s = SecretManagerSection.parseDelimitedFrom(in);
|
||||||
|
int numKeys = s.getNumKeys(), numTokens = s.getNumTokens();
|
||||||
|
ArrayList<SecretManagerSection.DelegationKey> keys = Lists
|
||||||
|
.newArrayListWithCapacity(numKeys);
|
||||||
|
ArrayList<SecretManagerSection.PersistToken> tokens = Lists
|
||||||
|
.newArrayListWithCapacity(numTokens);
|
||||||
|
|
||||||
|
for (int i = 0; i < numKeys; ++i)
|
||||||
|
keys.add(SecretManagerSection.DelegationKey.parseDelimitedFrom(in));
|
||||||
|
|
||||||
|
for (int i = 0; i < numTokens; ++i)
|
||||||
|
tokens.add(SecretManagerSection.PersistToken.parseDelimitedFrom(in));
|
||||||
|
|
||||||
|
fsn.loadSecretManagerState(s, keys, tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void loadCacheManagerSection(InputStream in) throws IOException {
|
||||||
|
CacheManagerSection s = CacheManagerSection.parseDelimitedFrom(in);
|
||||||
|
ArrayList<CachePoolInfoProto> pools = Lists.newArrayListWithCapacity(s
|
||||||
|
.getNumPools());
|
||||||
|
ArrayList<CacheDirectiveInfoProto> directives = Lists
|
||||||
|
.newArrayListWithCapacity(s.getNumDirectives());
|
||||||
|
for (int i = 0; i < s.getNumPools(); ++i)
|
||||||
|
pools.add(CachePoolInfoProto.parseDelimitedFrom(in));
|
||||||
|
for (int i = 0; i < s.getNumDirectives(); ++i)
|
||||||
|
directives.add(CacheDirectiveInfoProto.parseDelimitedFrom(in));
|
||||||
|
fsn.getCacheManager().loadState(
|
||||||
|
new CacheManager.PersistState(s, pools, directives));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final class Saver {
|
||||||
|
public static final int CHECK_CANCEL_INTERVAL = 4096;
|
||||||
|
|
||||||
|
private final SaveNamespaceContext context;
|
||||||
|
private final SaverContext saverContext;
|
||||||
|
private long currentOffset = FSImageUtil.MAGIC_HEADER.length;
|
||||||
|
private MD5Hash savedDigest;
|
||||||
|
|
||||||
|
private FileChannel fileChannel;
|
||||||
|
// OutputStream for the section data
|
||||||
|
private OutputStream sectionOutputStream;
|
||||||
|
private CompressionCodec codec;
|
||||||
|
private OutputStream underlyingOutputStream;
|
||||||
|
|
||||||
|
Saver(SaveNamespaceContext context) {
|
||||||
|
this.context = context;
|
||||||
|
this.saverContext = new SaverContext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public MD5Hash getSavedDigest() {
|
||||||
|
return savedDigest;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SaveNamespaceContext getContext() {
|
||||||
|
return context;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SaverContext getSaverContext() {
|
||||||
|
return saverContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void commitSection(FileSummary.Builder summary, SectionName name)
|
||||||
|
throws IOException {
|
||||||
|
long oldOffset = currentOffset;
|
||||||
|
flushSectionOutputStream();
|
||||||
|
|
||||||
|
if (codec != null) {
|
||||||
|
sectionOutputStream = codec.createOutputStream(underlyingOutputStream);
|
||||||
|
} else {
|
||||||
|
sectionOutputStream = underlyingOutputStream;
|
||||||
|
}
|
||||||
|
long length = fileChannel.position() - oldOffset;
|
||||||
|
summary.addSections(FileSummary.Section.newBuilder().setName(name.name)
|
||||||
|
.setLength(length).setOffset(currentOffset));
|
||||||
|
currentOffset += length;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void flushSectionOutputStream() throws IOException {
|
||||||
|
if (codec != null) {
|
||||||
|
((CompressorStream) sectionOutputStream).finish();
|
||||||
|
}
|
||||||
|
sectionOutputStream.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
void save(File file, FSImageCompression compression) throws IOException {
|
||||||
|
FileOutputStream fout = new FileOutputStream(file);
|
||||||
|
fileChannel = fout.getChannel();
|
||||||
|
try {
|
||||||
|
saveInternal(fout, compression, file.getAbsolutePath().toString());
|
||||||
|
} finally {
|
||||||
|
fout.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void saveFileSummary(OutputStream out, FileSummary summary)
|
||||||
|
throws IOException {
|
||||||
|
summary.writeDelimitedTo(out);
|
||||||
|
int length = getOndiskTrunkSize(summary);
|
||||||
|
byte[] lengthBytes = new byte[4];
|
||||||
|
ByteBuffer.wrap(lengthBytes).asIntBuffer().put(length);
|
||||||
|
out.write(lengthBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveInodes(FileSummary.Builder summary) throws IOException {
|
||||||
|
FSImageFormatPBINode.Saver saver = new FSImageFormatPBINode.Saver(this,
|
||||||
|
summary);
|
||||||
|
|
||||||
|
saver.serializeINodeSection(sectionOutputStream);
|
||||||
|
saver.serializeINodeDirectorySection(sectionOutputStream);
|
||||||
|
saver.serializeFilesUCSection(sectionOutputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveSnapshots(FileSummary.Builder summary) throws IOException {
|
||||||
|
FSImageFormatPBSnapshot.Saver snapshotSaver = new FSImageFormatPBSnapshot.Saver(
|
||||||
|
this, summary, context, context.getSourceNamesystem());
|
||||||
|
|
||||||
|
snapshotSaver.serializeSnapshotSection(sectionOutputStream);
|
||||||
|
snapshotSaver.serializeSnapshotDiffSection(sectionOutputStream);
|
||||||
|
snapshotSaver.serializeINodeReferenceSection(sectionOutputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveInternal(FileOutputStream fout,
|
||||||
|
FSImageCompression compression, String filePath) throws IOException {
|
||||||
|
StartupProgress prog = NameNode.getStartupProgress();
|
||||||
|
MessageDigest digester = MD5Hash.getDigester();
|
||||||
|
|
||||||
|
underlyingOutputStream = new DigestOutputStream(new BufferedOutputStream(
|
||||||
|
fout), digester);
|
||||||
|
underlyingOutputStream.write(FSImageUtil.MAGIC_HEADER);
|
||||||
|
|
||||||
|
fileChannel = fout.getChannel();
|
||||||
|
|
||||||
|
FileSummary.Builder b = FileSummary.newBuilder()
|
||||||
|
.setOndiskVersion(FSImageUtil.FILE_VERSION)
|
||||||
|
.setLayoutVersion(LayoutVersion.getCurrentLayoutVersion());
|
||||||
|
|
||||||
|
codec = compression.getImageCodec();
|
||||||
|
if (codec != null) {
|
||||||
|
b.setCodec(codec.getClass().getCanonicalName());
|
||||||
|
sectionOutputStream = codec.createOutputStream(underlyingOutputStream);
|
||||||
|
} else {
|
||||||
|
sectionOutputStream = underlyingOutputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
saveNameSystemSection(b);
|
||||||
|
// Check for cancellation right after serializing the name system section.
|
||||||
|
// Some unit tests, such as TestSaveNamespace#testCancelSaveNameSpace
|
||||||
|
// depends on this behavior.
|
||||||
|
context.checkCancelled();
|
||||||
|
|
||||||
|
Step step = new Step(StepType.INODES, filePath);
|
||||||
|
prog.beginStep(Phase.SAVING_CHECKPOINT, step);
|
||||||
|
saveInodes(b);
|
||||||
|
saveSnapshots(b);
|
||||||
|
prog.endStep(Phase.SAVING_CHECKPOINT, step);
|
||||||
|
|
||||||
|
step = new Step(StepType.DELEGATION_TOKENS, filePath);
|
||||||
|
prog.beginStep(Phase.SAVING_CHECKPOINT, step);
|
||||||
|
saveSecretManagerSection(b);
|
||||||
|
prog.endStep(Phase.SAVING_CHECKPOINT, step);
|
||||||
|
|
||||||
|
step = new Step(StepType.CACHE_POOLS, filePath);
|
||||||
|
prog.beginStep(Phase.SAVING_CHECKPOINT, step);
|
||||||
|
saveCacheManagerSection(b);
|
||||||
|
prog.endStep(Phase.SAVING_CHECKPOINT, step);
|
||||||
|
|
||||||
|
saveStringTableSection(b);
|
||||||
|
|
||||||
|
// We use the underlyingOutputStream to write the header. Therefore flush
|
||||||
|
// the buffered stream (which is potentially compressed) first.
|
||||||
|
flushSectionOutputStream();
|
||||||
|
|
||||||
|
FileSummary summary = b.build();
|
||||||
|
saveFileSummary(underlyingOutputStream, summary);
|
||||||
|
underlyingOutputStream.close();
|
||||||
|
savedDigest = new MD5Hash(digester.digest());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveSecretManagerSection(FileSummary.Builder summary)
|
||||||
|
throws IOException {
|
||||||
|
final FSNamesystem fsn = context.getSourceNamesystem();
|
||||||
|
DelegationTokenSecretManager.SecretManagerState state = fsn
|
||||||
|
.saveSecretManagerState();
|
||||||
|
state.section.writeDelimitedTo(sectionOutputStream);
|
||||||
|
for (SecretManagerSection.DelegationKey k : state.keys)
|
||||||
|
k.writeDelimitedTo(sectionOutputStream);
|
||||||
|
|
||||||
|
for (SecretManagerSection.PersistToken t : state.tokens)
|
||||||
|
t.writeDelimitedTo(sectionOutputStream);
|
||||||
|
|
||||||
|
commitSection(summary, SectionName.SECRET_MANAGER);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveCacheManagerSection(FileSummary.Builder summary)
|
||||||
|
throws IOException {
|
||||||
|
final FSNamesystem fsn = context.getSourceNamesystem();
|
||||||
|
CacheManager.PersistState state = fsn.getCacheManager().saveState();
|
||||||
|
state.section.writeDelimitedTo(sectionOutputStream);
|
||||||
|
|
||||||
|
for (CachePoolInfoProto p : state.pools)
|
||||||
|
p.writeDelimitedTo(sectionOutputStream);
|
||||||
|
|
||||||
|
for (CacheDirectiveInfoProto p : state.directives)
|
||||||
|
p.writeDelimitedTo(sectionOutputStream);
|
||||||
|
|
||||||
|
commitSection(summary, SectionName.CACHE_MANAGER);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveNameSystemSection(FileSummary.Builder summary)
|
||||||
|
throws IOException {
|
||||||
|
final FSNamesystem fsn = context.getSourceNamesystem();
|
||||||
|
OutputStream out = sectionOutputStream;
|
||||||
|
NameSystemSection.Builder b = NameSystemSection.newBuilder()
|
||||||
|
.setGenstampV1(fsn.getGenerationStampV1())
|
||||||
|
.setGenstampV1Limit(fsn.getGenerationStampV1Limit())
|
||||||
|
.setGenstampV2(fsn.getGenerationStampV2())
|
||||||
|
.setLastAllocatedBlockId(fsn.getLastAllocatedBlockId())
|
||||||
|
.setTransactionId(context.getTxId());
|
||||||
|
|
||||||
|
// We use the non-locked version of getNamespaceInfo here since
|
||||||
|
// the coordinating thread of saveNamespace already has read-locked
|
||||||
|
// the namespace for us. If we attempt to take another readlock
|
||||||
|
// from the actual saver thread, there's a potential of a
|
||||||
|
// fairness-related deadlock. See the comments on HDFS-2223.
|
||||||
|
b.setNamespaceId(fsn.unprotectedGetNamespaceInfo().getNamespaceID());
|
||||||
|
NameSystemSection s = b.build();
|
||||||
|
s.writeDelimitedTo(out);
|
||||||
|
|
||||||
|
commitSection(summary, SectionName.NS_INFO);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveStringTableSection(FileSummary.Builder summary)
|
||||||
|
throws IOException {
|
||||||
|
OutputStream out = sectionOutputStream;
|
||||||
|
StringTableSection.Builder b = StringTableSection.newBuilder()
|
||||||
|
.setNumEntry(saverContext.stringMap.size());
|
||||||
|
b.build().writeDelimitedTo(out);
|
||||||
|
for (Entry<String, Integer> e : saverContext.stringMap.entrySet()) {
|
||||||
|
StringTableSection.Entry.Builder eb = StringTableSection.Entry
|
||||||
|
.newBuilder().setId(e.getValue()).setStr(e.getKey());
|
||||||
|
eb.build().writeDelimitedTo(out);
|
||||||
|
}
|
||||||
|
commitSection(summary, SectionName.STRING_TABLE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Supported section name. The order of the enum determines the order of
|
||||||
|
* loading.
|
||||||
|
*/
|
||||||
|
public enum SectionName {
|
||||||
|
NS_INFO("NS_INFO"),
|
||||||
|
STRING_TABLE("STRING_TABLE"),
|
||||||
|
INODE("INODE"),
|
||||||
|
INODE_REFRENCE("INODE_REFRENCE"),
|
||||||
|
SNAPSHOT("SNAPSHOT"),
|
||||||
|
INODE_DIR("INODE_DIR"),
|
||||||
|
FILES_UNDERCONSTRUCTION("FILES_UNDERCONSTRUCTION"),
|
||||||
|
SNAPSHOT_DIFF("SNAPSHOT_DIFF"),
|
||||||
|
SECRET_MANAGER("SECRET_MANAGER"),
|
||||||
|
CACHE_MANAGER("CACHE_MANAGER");
|
||||||
|
|
||||||
|
private static final SectionName[] values = SectionName.values();
|
||||||
|
|
||||||
|
public static SectionName fromString(String name) {
|
||||||
|
for (SectionName n : values) {
|
||||||
|
if (n.name.equals(name))
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final String name;
|
||||||
|
|
||||||
|
private SectionName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int getOndiskTrunkSize(com.google.protobuf.GeneratedMessage s) {
|
||||||
|
return CodedOutputStream.computeRawVarint32Size(s.getSerializedSize())
|
||||||
|
+ s.getSerializedSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
private FSImageFormatProtobuf() {
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,93 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.Loader;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
|
||||||
|
import org.apache.hadoop.io.compress.CompressionCodec;
|
||||||
|
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public final class FSImageUtil {
|
||||||
|
public static final byte[] MAGIC_HEADER = "HDFSIMG1".getBytes();
|
||||||
|
public static final int FILE_VERSION = 1;
|
||||||
|
|
||||||
|
public static boolean checkFileFormat(RandomAccessFile file)
|
||||||
|
throws IOException {
|
||||||
|
if (file.length() < Loader.MINIMUM_FILE_LENGTH)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
byte[] magic = new byte[MAGIC_HEADER.length];
|
||||||
|
file.readFully(magic);
|
||||||
|
if (!Arrays.equals(MAGIC_HEADER, magic))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static FileSummary loadSummary(RandomAccessFile file)
|
||||||
|
throws IOException {
|
||||||
|
final int FILE_LENGTH_FIELD_SIZE = 4;
|
||||||
|
long fileLength = file.length();
|
||||||
|
file.seek(fileLength - FILE_LENGTH_FIELD_SIZE);
|
||||||
|
int summaryLength = file.readInt();
|
||||||
|
|
||||||
|
if (summaryLength <= 0) {
|
||||||
|
throw new IOException("Negative length of the file");
|
||||||
|
}
|
||||||
|
file.seek(fileLength - FILE_LENGTH_FIELD_SIZE - summaryLength);
|
||||||
|
|
||||||
|
byte[] summaryBytes = new byte[summaryLength];
|
||||||
|
file.readFully(summaryBytes);
|
||||||
|
|
||||||
|
FileSummary summary = FileSummary
|
||||||
|
.parseDelimitedFrom(new ByteArrayInputStream(summaryBytes));
|
||||||
|
if (summary.getOndiskVersion() != FILE_VERSION) {
|
||||||
|
throw new IOException("Unsupported file version "
|
||||||
|
+ summary.getOndiskVersion());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!LayoutVersion.supports(Feature.PROTOBUF_FORMAT,
|
||||||
|
summary.getLayoutVersion())) {
|
||||||
|
throw new IOException("Unsupported layout version "
|
||||||
|
+ summary.getLayoutVersion());
|
||||||
|
}
|
||||||
|
return summary;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static InputStream wrapInputStreamForCompression(
|
||||||
|
Configuration conf, String codec, InputStream in) throws IOException {
|
||||||
|
if (codec.isEmpty())
|
||||||
|
return in;
|
||||||
|
|
||||||
|
FSImageCompression compression = FSImageCompression.createCompression(
|
||||||
|
conf, codec);
|
||||||
|
CompressionCodec imageCodec = compression.getImageCodec();
|
||||||
|
return imageCodec.createInputStream(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -181,6 +181,7 @@
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
|
||||||
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
|
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
|
||||||
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
|
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
|
||||||
|
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
|
||||||
|
@ -198,6 +199,8 @@
|
||||||
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
|
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
|
||||||
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
||||||
import org.apache.hadoop.hdfs.server.common.Util;
|
import org.apache.hadoop.hdfs.server.common.Util;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection.PersistToken;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
|
import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
|
import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
|
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
|
||||||
|
@ -6006,6 +6009,15 @@ void saveFilesUnderConstruction(DataOutputStream out,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return all the under-construction files in the lease map
|
||||||
|
*/
|
||||||
|
Map<String, INodeFile> getFilesUnderConstruction() {
|
||||||
|
synchronized (leaseManager) {
|
||||||
|
return leaseManager.getINodesUnderConstruction();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Register a Backup name-node, verifying that it belongs
|
* Register a Backup name-node, verifying that it belongs
|
||||||
* to the correct namespace, and adding it to the set of
|
* to the correct namespace, and adding it to the set of
|
||||||
|
@ -6282,6 +6294,10 @@ void saveSecretManagerStateCompat(DataOutputStream out, String sdPath)
|
||||||
dtSecretManager.saveSecretManagerStateCompat(out, sdPath);
|
dtSecretManager.saveSecretManagerStateCompat(out, sdPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SecretManagerState saveSecretManagerState() {
|
||||||
|
return dtSecretManager.saveSecretManagerState();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param in load the state of secret manager from input stream
|
* @param in load the state of secret manager from input stream
|
||||||
*/
|
*/
|
||||||
|
@ -6289,6 +6305,12 @@ void loadSecretManagerStateCompat(DataInput in) throws IOException {
|
||||||
dtSecretManager.loadSecretManagerStateCompat(in);
|
dtSecretManager.loadSecretManagerStateCompat(in);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void loadSecretManagerState(SecretManagerSection s,
|
||||||
|
List<SecretManagerSection.DelegationKey> keys,
|
||||||
|
List<SecretManagerSection.PersistToken> tokens) throws IOException {
|
||||||
|
dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Log the updateMasterKey operation to edit logs
|
* Log the updateMasterKey operation to edit logs
|
||||||
*
|
*
|
||||||
|
@ -6815,6 +6837,11 @@ public boolean isAvoidingStaleDataNodesForWrite() {
|
||||||
.shouldAvoidStaleDataNodesForWrite();
|
.shouldAvoidStaleDataNodesForWrite();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override // FSClusterStats
|
||||||
|
public int getNumDatanodesInService() {
|
||||||
|
return getNumLiveDataNodes() - getNumDecomLiveDataNodes();
|
||||||
|
}
|
||||||
|
|
||||||
public SnapshotManager getSnapshotManager() {
|
public SnapshotManager getSnapshotManager() {
|
||||||
return snapshotManager;
|
return snapshotManager;
|
||||||
}
|
}
|
||||||
|
|
|
@ -171,7 +171,7 @@ private int searchChildren(byte[] name) {
|
||||||
return children == null? -1: Collections.binarySearch(children, name);
|
return children == null? -1: Collections.binarySearch(children, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected DirectoryWithSnapshotFeature addSnapshotFeature(
|
public DirectoryWithSnapshotFeature addSnapshotFeature(
|
||||||
DirectoryDiffList diffs) {
|
DirectoryDiffList diffs) {
|
||||||
Preconditions.checkState(!isWithSnapshot(),
|
Preconditions.checkState(!isWithSnapshot(),
|
||||||
"Directory is already with snapshot");
|
"Directory is already with snapshot");
|
||||||
|
|
|
@ -252,7 +252,7 @@ boolean removeLastBlock(Block oldblock) {
|
||||||
|
|
||||||
/* Start of Snapshot Feature */
|
/* Start of Snapshot Feature */
|
||||||
|
|
||||||
private FileWithSnapshotFeature addSnapshotFeature(FileDiffList diffs) {
|
public FileWithSnapshotFeature addSnapshotFeature(FileDiffList diffs) {
|
||||||
Preconditions.checkState(!isWithSnapshot(),
|
Preconditions.checkState(!isWithSnapshot(),
|
||||||
"File is already with snapshot");
|
"File is already with snapshot");
|
||||||
FileWithSnapshotFeature sf = new FileWithSnapshotFeature(diffs);
|
FileWithSnapshotFeature sf = new FileWithSnapshotFeature(diffs);
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
|
@ -46,6 +47,10 @@ static INodeMap newInstance(INodeDirectory rootDir) {
|
||||||
/** Synchronized by external lock. */
|
/** Synchronized by external lock. */
|
||||||
private final GSet<INode, INodeWithAdditionalFields> map;
|
private final GSet<INode, INodeWithAdditionalFields> map;
|
||||||
|
|
||||||
|
public Iterator<INodeWithAdditionalFields> getMapIterator() {
|
||||||
|
return map.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
private INodeMap(GSet<INode, INodeWithAdditionalFields> map) {
|
private INodeMap(GSet<INode, INodeWithAdditionalFields> map) {
|
||||||
Preconditions.checkArgument(map != null);
|
Preconditions.checkArgument(map != null);
|
||||||
this.map = map;
|
this.map = map;
|
||||||
|
|
|
@ -17,39 +17,22 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
|
import com.google.common.base.Joiner;
|
||||||
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.PrintStream;
|
|
||||||
import java.net.InetSocketAddress;
|
|
||||||
import java.net.URI;
|
|
||||||
import java.security.PrivilegedExceptionAction;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import javax.management.ObjectName;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.HadoopIllegalArgumentException;
|
import org.apache.hadoop.HadoopIllegalArgumentException;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Trash;
|
||||||
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
||||||
import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
|
import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
|
||||||
import org.apache.hadoop.ha.HAServiceStatus;
|
import org.apache.hadoop.ha.HAServiceStatus;
|
||||||
import org.apache.hadoop.ha.HealthCheckFailedException;
|
import org.apache.hadoop.ha.HealthCheckFailedException;
|
||||||
import org.apache.hadoop.ha.ServiceFailedException;
|
import org.apache.hadoop.ha.ServiceFailedException;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Trash;
|
|
||||||
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
|
|
||||||
import static org.apache.hadoop.util.ExitUtil.terminate;
|
|
||||||
import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
|
|
||||||
|
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.DFSUtil;
|
import org.apache.hadoop.hdfs.DFSUtil;
|
||||||
import org.apache.hadoop.hdfs.HAUtil;
|
import org.apache.hadoop.hdfs.HAUtil;
|
||||||
|
@ -58,20 +41,11 @@
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
|
import org.apache.hadoop.hdfs.server.namenode.ha.*;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.ha.BootstrapStandby;
|
|
||||||
import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
|
|
||||||
import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
|
|
||||||
import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
|
|
||||||
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
|
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
|
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
|
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
|
import org.apache.hadoop.hdfs.server.protocol.*;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
|
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
|
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
|
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
|
||||||
import org.apache.hadoop.ipc.Server;
|
import org.apache.hadoop.ipc.Server;
|
||||||
import org.apache.hadoop.ipc.StandbyException;
|
import org.apache.hadoop.ipc.StandbyException;
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
|
@ -89,10 +63,23 @@
|
||||||
import org.apache.hadoop.util.ServicePlugin;
|
import org.apache.hadoop.util.ServicePlugin;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import javax.management.ObjectName;
|
||||||
import com.google.common.base.Joiner;
|
import java.io.IOException;
|
||||||
import com.google.common.base.Preconditions;
|
import java.io.PrintStream;
|
||||||
import com.google.common.collect.Lists;
|
import java.net.InetSocketAddress;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.security.PrivilegedExceptionAction;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
|
||||||
|
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
|
||||||
|
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
|
||||||
|
import static org.apache.hadoop.util.ExitUtil.terminate;
|
||||||
|
import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
|
||||||
|
|
||||||
/**********************************************************
|
/**********************************************************
|
||||||
* NameNode serves as both directory namespace manager and
|
* NameNode serves as both directory namespace manager and
|
||||||
|
@ -183,8 +170,10 @@ public static enum OperationCategory {
|
||||||
DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
|
DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
|
||||||
DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
|
DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
|
||||||
DFS_NAMENODE_HTTP_ADDRESS_KEY,
|
DFS_NAMENODE_HTTP_ADDRESS_KEY,
|
||||||
|
DFS_NAMENODE_HTTPS_ADDRESS_KEY,
|
||||||
DFS_NAMENODE_KEYTAB_FILE_KEY,
|
DFS_NAMENODE_KEYTAB_FILE_KEY,
|
||||||
DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
|
DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
|
||||||
|
DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY,
|
||||||
DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
|
DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
|
||||||
DFS_NAMENODE_BACKUP_ADDRESS_KEY,
|
DFS_NAMENODE_BACKUP_ADDRESS_KEY,
|
||||||
DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
|
DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
|
||||||
|
|
|
@ -32,6 +32,7 @@
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
@ -40,9 +41,12 @@
|
||||||
import org.apache.hadoop.fs.UnresolvedLinkException;
|
import org.apache.hadoop.fs.UnresolvedLinkException;
|
||||||
import org.apache.hadoop.hdfs.BlockReader;
|
import org.apache.hadoop.hdfs.BlockReader;
|
||||||
import org.apache.hadoop.hdfs.BlockReaderFactory;
|
import org.apache.hadoop.hdfs.BlockReaderFactory;
|
||||||
|
import org.apache.hadoop.hdfs.ClientContext;
|
||||||
import org.apache.hadoop.hdfs.DFSClient;
|
import org.apache.hadoop.hdfs.DFSClient;
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.DFSUtil;
|
import org.apache.hadoop.hdfs.DFSUtil;
|
||||||
|
import org.apache.hadoop.hdfs.RemotePeerFactory;
|
||||||
|
import org.apache.hadoop.hdfs.net.Peer;
|
||||||
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.DirectoryListing;
|
import org.apache.hadoop.hdfs.protocol.DirectoryListing;
|
||||||
|
@ -569,11 +573,10 @@ private void copyBlock(DFSClient dfs, LocatedBlock lblock,
|
||||||
int failures = 0;
|
int failures = 0;
|
||||||
InetSocketAddress targetAddr = null;
|
InetSocketAddress targetAddr = null;
|
||||||
TreeSet<DatanodeInfo> deadNodes = new TreeSet<DatanodeInfo>();
|
TreeSet<DatanodeInfo> deadNodes = new TreeSet<DatanodeInfo>();
|
||||||
Socket s = null;
|
|
||||||
BlockReader blockReader = null;
|
BlockReader blockReader = null;
|
||||||
ExtendedBlock block = lblock.getBlock();
|
ExtendedBlock block = lblock.getBlock();
|
||||||
|
|
||||||
while (s == null) {
|
while (blockReader == null) {
|
||||||
DatanodeInfo chosenNode;
|
DatanodeInfo chosenNode;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -593,34 +596,47 @@ private void copyBlock(DFSClient dfs, LocatedBlock lblock,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
s = NetUtils.getDefaultSocketFactory(conf).createSocket();
|
String file = BlockReaderFactory.getFileName(targetAddr,
|
||||||
s.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT);
|
block.getBlockPoolId(), block.getBlockId());
|
||||||
s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
|
blockReader = new BlockReaderFactory(dfs.getConf()).
|
||||||
|
setFileName(file).
|
||||||
String file = BlockReaderFactory.getFileName(targetAddr, block.getBlockPoolId(),
|
setBlock(block).
|
||||||
block.getBlockId());
|
setBlockToken(lblock.getBlockToken()).
|
||||||
blockReader = BlockReaderFactory.newBlockReader(dfs.getConf(),
|
setStartOffset(0).
|
||||||
file, block, lblock.getBlockToken(), 0, -1, true, "fsck",
|
setLength(-1).
|
||||||
TcpPeerServer.peerFromSocketAndKey(s, namenode.getRpcServer().
|
setVerifyChecksum(true).
|
||||||
getDataEncryptionKey()), chosenNode, null, null, null,
|
setClientName("fsck").
|
||||||
false, CachingStrategy.newDropBehind());
|
setDatanodeInfo(chosenNode).
|
||||||
|
setInetSocketAddress(targetAddr).
|
||||||
|
setCachingStrategy(CachingStrategy.newDropBehind()).
|
||||||
|
setClientCacheContext(dfs.getClientContext()).
|
||||||
|
setConfiguration(namenode.conf).
|
||||||
|
setRemotePeerFactory(new RemotePeerFactory() {
|
||||||
|
@Override
|
||||||
|
public Peer newConnectedPeer(InetSocketAddress addr)
|
||||||
|
throws IOException {
|
||||||
|
Peer peer = null;
|
||||||
|
Socket s = NetUtils.getDefaultSocketFactory(conf).createSocket();
|
||||||
|
try {
|
||||||
|
s.connect(addr, HdfsServerConstants.READ_TIMEOUT);
|
||||||
|
s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
|
||||||
|
peer = TcpPeerServer.peerFromSocketAndKey(s, namenode.getRpcServer().
|
||||||
|
getDataEncryptionKey());
|
||||||
|
} finally {
|
||||||
|
if (peer == null) {
|
||||||
|
IOUtils.closeQuietly(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return peer;
|
||||||
|
}
|
||||||
|
}).
|
||||||
|
build();
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
// Put chosen node into dead list, continue
|
// Put chosen node into dead list, continue
|
||||||
LOG.info("Failed to connect to " + targetAddr + ":" + ex);
|
LOG.info("Failed to connect to " + targetAddr + ":" + ex);
|
||||||
deadNodes.add(chosenNode);
|
deadNodes.add(chosenNode);
|
||||||
if (s != null) {
|
|
||||||
try {
|
|
||||||
s.close();
|
|
||||||
} catch (IOException iex) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s = null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (blockReader == null) {
|
|
||||||
throw new Exception("Could not open data stream for " + lblock.getBlock());
|
|
||||||
}
|
|
||||||
byte[] buf = new byte[1024];
|
byte[] buf = new byte[1024];
|
||||||
int cnt = 0;
|
int cnt = 0;
|
||||||
boolean success = true;
|
boolean success = true;
|
||||||
|
@ -638,10 +654,11 @@ private void copyBlock(DFSClient dfs, LocatedBlock lblock,
|
||||||
LOG.error("Error reading block", e);
|
LOG.error("Error reading block", e);
|
||||||
success = false;
|
success = false;
|
||||||
} finally {
|
} finally {
|
||||||
try {s.close(); } catch (Exception e1) {}
|
blockReader.close();
|
||||||
}
|
}
|
||||||
if (!success)
|
if (!success) {
|
||||||
throw new Exception("Could not copy block data for " + lblock.getBlock());
|
throw new Exception("Could not copy block data for " + lblock.getBlock());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.CountDownLatch;
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
||||||
import org.apache.hadoop.hdfs.util.Canceler;
|
import org.apache.hadoop.hdfs.util.Canceler;
|
||||||
|
|
||||||
|
@ -32,7 +33,8 @@
|
||||||
* allows cancellation, and also is responsible for accumulating
|
* allows cancellation, and also is responsible for accumulating
|
||||||
* failed storage directories.
|
* failed storage directories.
|
||||||
*/
|
*/
|
||||||
class SaveNamespaceContext {
|
@InterfaceAudience.Private
|
||||||
|
public class SaveNamespaceContext {
|
||||||
private final FSNamesystem sourceNamesystem;
|
private final FSNamesystem sourceNamesystem;
|
||||||
private final long txid;
|
private final long txid;
|
||||||
private final List<StorageDirectory> errorSDs =
|
private final List<StorageDirectory> errorSDs =
|
||||||
|
@ -72,7 +74,7 @@ void markComplete() {
|
||||||
completionLatch.countDown();
|
completionLatch.countDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
void checkCancelled() throws SaveNamespaceCancelledException {
|
public void checkCancelled() throws SaveNamespaceCancelledException {
|
||||||
if (canceller.isCancelled()) {
|
if (canceller.isCancelled()) {
|
||||||
throw new SaveNamespaceCancelledException(
|
throw new SaveNamespaceCancelledException(
|
||||||
canceller.getCancellationReason());
|
canceller.getCancellationReason());
|
||||||
|
|
|
@ -244,7 +244,7 @@ private DirectoryDiff(int snapshotId, INodeDirectory dir) {
|
||||||
this.isSnapshotRoot = isSnapshotRoot;
|
this.isSnapshotRoot = isSnapshotRoot;
|
||||||
}
|
}
|
||||||
|
|
||||||
ChildrenDiff getChildrenDiff() {
|
public ChildrenDiff getChildrenDiff() {
|
||||||
return diff;
|
return diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -343,6 +343,10 @@ public String toString() {
|
||||||
return super.toString() + " childrenSize=" + childrenSize + ", " + diff;
|
return super.toString() + " childrenSize=" + childrenSize + ", " + diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int getChildrenSize() {
|
||||||
|
return childrenSize;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void write(DataOutput out, ReferenceMap referenceMap) throws IOException {
|
void write(DataOutput out, ReferenceMap referenceMap) throws IOException {
|
||||||
writeSnapshot(out);
|
writeSnapshot(out);
|
||||||
|
|
|
@ -0,0 +1,506 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.server.namenode.snapshot;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.loadINodeDirectory;
|
||||||
|
import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.loadPermission;
|
||||||
|
import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.updateBlocksMap;
|
||||||
|
import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Saver.buildINodeDirectory;
|
||||||
|
import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Saver.buildINodeFile;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.fs.permission.PermissionStatus;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeReferenceSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection.CreatedListEntry;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection.DiffEntry.Type;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.INode;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryAttributes;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.INodeMap;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.INodeReference;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.INodeReference.DstReference;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithCount;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithName;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.INodeWithAdditionalFields;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.SaveNamespaceContext;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiffList;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.Root;
|
||||||
|
import org.apache.hadoop.hdfs.util.Diff.ListType;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.protobuf.ByteString;
|
||||||
|
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class FSImageFormatPBSnapshot {
|
||||||
|
/**
|
||||||
|
* Loading snapshot related information from protobuf based FSImage
|
||||||
|
*/
|
||||||
|
public final static class Loader {
|
||||||
|
private final FSNamesystem fsn;
|
||||||
|
private final FSDirectory fsDir;
|
||||||
|
private final FSImageFormatProtobuf.Loader parent;
|
||||||
|
private final Map<Integer, Snapshot> snapshotMap;
|
||||||
|
|
||||||
|
public Loader(FSNamesystem fsn, FSImageFormatProtobuf.Loader parent) {
|
||||||
|
this.fsn = fsn;
|
||||||
|
this.fsDir = fsn.getFSDirectory();
|
||||||
|
this.snapshotMap = new HashMap<Integer, Snapshot>();
|
||||||
|
this.parent = parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The sequence of the ref node in refList must be strictly the same with
|
||||||
|
* the sequence in fsimage
|
||||||
|
*/
|
||||||
|
public void loadINodeReferenceSection(InputStream in) throws IOException {
|
||||||
|
final List<INodeReference> refList = parent.getLoaderContext()
|
||||||
|
.getRefList();
|
||||||
|
while (true) {
|
||||||
|
INodeReferenceSection.INodeReference e = INodeReferenceSection
|
||||||
|
.INodeReference.parseDelimitedFrom(in);
|
||||||
|
if (e == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
INodeReference ref = loadINodeReference(e);
|
||||||
|
refList.add(ref);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private INodeReference loadINodeReference(
|
||||||
|
INodeReferenceSection.INodeReference r) throws IOException {
|
||||||
|
long referredId = r.getReferredId();
|
||||||
|
INode referred = fsDir.getInode(referredId);
|
||||||
|
WithCount withCount = (WithCount) referred.getParentReference();
|
||||||
|
if (withCount == null) {
|
||||||
|
withCount = new INodeReference.WithCount(null, referred);
|
||||||
|
}
|
||||||
|
final INodeReference ref;
|
||||||
|
if (r.hasDstSnapshotId()) { // DstReference
|
||||||
|
ref = new INodeReference.DstReference(null, withCount,
|
||||||
|
r.getDstSnapshotId());
|
||||||
|
} else {
|
||||||
|
ref = new INodeReference.WithName(null, withCount, r.getName()
|
||||||
|
.toByteArray(), r.getLastSnapshotId());
|
||||||
|
}
|
||||||
|
return ref;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load the snapshots section from fsimage. Also convert snapshottable
|
||||||
|
* directories into {@link INodeDirectorySnapshottable}.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public void loadSnapshotSection(InputStream in) throws IOException {
|
||||||
|
SnapshotManager sm = fsn.getSnapshotManager();
|
||||||
|
SnapshotSection section = SnapshotSection.parseDelimitedFrom(in);
|
||||||
|
int snum = section.getNumSnapshots();
|
||||||
|
sm.setNumSnapshots(snum);
|
||||||
|
sm.setSnapshotCounter(section.getSnapshotCounter());
|
||||||
|
for (long sdirId : section.getSnapshottableDirList()) {
|
||||||
|
INodeDirectory dir = fsDir.getInode(sdirId).asDirectory();
|
||||||
|
final INodeDirectorySnapshottable sdir;
|
||||||
|
if (!dir.isSnapshottable()) {
|
||||||
|
sdir = new INodeDirectorySnapshottable(dir);
|
||||||
|
fsDir.addToInodeMap(sdir);
|
||||||
|
} else {
|
||||||
|
// dir is root, and admin set root to snapshottable before
|
||||||
|
sdir = (INodeDirectorySnapshottable) dir;
|
||||||
|
sdir.setSnapshotQuota(INodeDirectorySnapshottable.SNAPSHOT_LIMIT);
|
||||||
|
}
|
||||||
|
sm.addSnapshottable(sdir);
|
||||||
|
}
|
||||||
|
loadSnapshots(in, snum);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void loadSnapshots(InputStream in, int size) throws IOException {
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
SnapshotSection.Snapshot pbs = SnapshotSection.Snapshot
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
INodeDirectory root = loadINodeDirectory(pbs.getRoot(),
|
||||||
|
parent.getLoaderContext().getStringTable());
|
||||||
|
int sid = pbs.getSnapshotId();
|
||||||
|
INodeDirectorySnapshottable parent = (INodeDirectorySnapshottable) fsDir
|
||||||
|
.getInode(root.getId()).asDirectory();
|
||||||
|
Snapshot snapshot = new Snapshot(sid, root, parent);
|
||||||
|
// add the snapshot to parent, since we follow the sequence of
|
||||||
|
// snapshotsByNames when saving, we do not need to sort when loading
|
||||||
|
parent.addSnapshot(snapshot);
|
||||||
|
snapshotMap.put(sid, snapshot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load the snapshot diff section from fsimage.
|
||||||
|
*/
|
||||||
|
public void loadSnapshotDiffSection(InputStream in) throws IOException {
|
||||||
|
final List<INodeReference> refList = parent.getLoaderContext()
|
||||||
|
.getRefList();
|
||||||
|
while (true) {
|
||||||
|
SnapshotDiffSection.DiffEntry entry = SnapshotDiffSection.DiffEntry
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
if (entry == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
long inodeId = entry.getInodeId();
|
||||||
|
INode inode = fsDir.getInode(inodeId);
|
||||||
|
SnapshotDiffSection.DiffEntry.Type type = entry.getType();
|
||||||
|
switch (type) {
|
||||||
|
case FILEDIFF:
|
||||||
|
loadFileDiffList(in, inode.asFile(), entry.getNumOfDiff());
|
||||||
|
break;
|
||||||
|
case DIRECTORYDIFF:
|
||||||
|
loadDirectoryDiffList(in, inode.asDirectory(), entry.getNumOfDiff(),
|
||||||
|
refList);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Load FileDiff list for a file with snapshot feature */
|
||||||
|
private void loadFileDiffList(InputStream in, INodeFile file, int size)
|
||||||
|
throws IOException {
|
||||||
|
final FileDiffList diffs = new FileDiffList();
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
SnapshotDiffSection.FileDiff pbf = SnapshotDiffSection.FileDiff
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
INodeFileAttributes copy = null;
|
||||||
|
if (pbf.hasSnapshotCopy()) {
|
||||||
|
INodeSection.INodeFile fileInPb = pbf.getSnapshotCopy();
|
||||||
|
PermissionStatus permission = loadPermission(
|
||||||
|
fileInPb.getPermission(), parent.getLoaderContext()
|
||||||
|
.getStringTable());
|
||||||
|
copy = new INodeFileAttributes.SnapshotCopy(pbf.getName()
|
||||||
|
.toByteArray(), permission, fileInPb.getModificationTime(),
|
||||||
|
fileInPb.getAccessTime(), (short) fileInPb.getReplication(),
|
||||||
|
fileInPb.getPreferredBlockSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
FileDiff diff = new FileDiff(pbf.getSnapshotId(), copy, null,
|
||||||
|
pbf.getFileSize());
|
||||||
|
diffs.addFirst(diff);
|
||||||
|
}
|
||||||
|
file.addSnapshotFeature(diffs);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Load the created list in a DirectoryDiff */
|
||||||
|
private List<INode> loadCreatedList(InputStream in, INodeDirectory dir,
|
||||||
|
int size) throws IOException {
|
||||||
|
List<INode> clist = new ArrayList<INode>(size);
|
||||||
|
for (long c = 0; c < size; c++) {
|
||||||
|
CreatedListEntry entry = CreatedListEntry.parseDelimitedFrom(in);
|
||||||
|
INode created = SnapshotFSImageFormat.loadCreated(entry.getName()
|
||||||
|
.toByteArray(), dir);
|
||||||
|
clist.add(created);
|
||||||
|
}
|
||||||
|
return clist;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addToDeletedList(INode dnode, INodeDirectory parent) {
|
||||||
|
dnode.setParent(parent);
|
||||||
|
if (dnode.isFile()) {
|
||||||
|
updateBlocksMap(dnode.asFile(), fsn.getBlockManager());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load the deleted list in a DirectoryDiff
|
||||||
|
*/
|
||||||
|
private List<INode> loadDeletedList(final List<INodeReference> refList,
|
||||||
|
InputStream in, INodeDirectory dir, List<Long> deletedNodes,
|
||||||
|
List<Integer> deletedRefNodes)
|
||||||
|
throws IOException {
|
||||||
|
List<INode> dlist = new ArrayList<INode>(deletedRefNodes.size()
|
||||||
|
+ deletedNodes.size());
|
||||||
|
// load non-reference inodes
|
||||||
|
for (long deletedId : deletedNodes) {
|
||||||
|
INode deleted = fsDir.getInode(deletedId);
|
||||||
|
dlist.add(deleted);
|
||||||
|
addToDeletedList(deleted, dir);
|
||||||
|
}
|
||||||
|
// load reference nodes in the deleted list
|
||||||
|
for (int refId : deletedRefNodes) {
|
||||||
|
INodeReference deletedRef = refList.get(refId);
|
||||||
|
dlist.add(deletedRef);
|
||||||
|
addToDeletedList(deletedRef, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
Collections.sort(dlist, new Comparator<INode>() {
|
||||||
|
@Override
|
||||||
|
public int compare(INode n1, INode n2) {
|
||||||
|
return n1.compareTo(n2.getLocalNameBytes());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return dlist;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Load DirectoryDiff list for a directory with snapshot feature */
|
||||||
|
private void loadDirectoryDiffList(InputStream in, INodeDirectory dir,
|
||||||
|
int size, final List<INodeReference> refList) throws IOException {
|
||||||
|
if (!dir.isWithSnapshot()) {
|
||||||
|
dir.addSnapshotFeature(null);
|
||||||
|
}
|
||||||
|
DirectoryDiffList diffs = dir.getDiffs();
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
// load a directory diff
|
||||||
|
SnapshotDiffSection.DirectoryDiff diffInPb = SnapshotDiffSection.
|
||||||
|
DirectoryDiff.parseDelimitedFrom(in);
|
||||||
|
final int snapshotId = diffInPb.getSnapshotId();
|
||||||
|
final Snapshot snapshot = snapshotMap.get(snapshotId);
|
||||||
|
int childrenSize = diffInPb.getChildrenSize();
|
||||||
|
boolean useRoot = diffInPb.getIsSnapshotRoot();
|
||||||
|
INodeDirectoryAttributes copy = null;
|
||||||
|
if (useRoot) {
|
||||||
|
copy = snapshot.getRoot();
|
||||||
|
} else if (diffInPb.hasSnapshotCopy()) {
|
||||||
|
INodeSection.INodeDirectory dirCopyInPb = diffInPb.getSnapshotCopy();
|
||||||
|
final byte[] name = diffInPb.getName().toByteArray();
|
||||||
|
PermissionStatus permission = loadPermission(
|
||||||
|
dirCopyInPb.getPermission(), parent.getLoaderContext()
|
||||||
|
.getStringTable());
|
||||||
|
long modTime = dirCopyInPb.getModificationTime();
|
||||||
|
boolean noQuota = dirCopyInPb.getNsQuota() == -1
|
||||||
|
&& dirCopyInPb.getDsQuota() == -1;
|
||||||
|
copy = noQuota ? new INodeDirectoryAttributes.SnapshotCopy(name,
|
||||||
|
permission, modTime)
|
||||||
|
: new INodeDirectoryAttributes.CopyWithQuota(name, permission,
|
||||||
|
modTime, dirCopyInPb.getNsQuota(), dirCopyInPb.getDsQuota());
|
||||||
|
}
|
||||||
|
// load created list
|
||||||
|
List<INode> clist = loadCreatedList(in, dir,
|
||||||
|
diffInPb.getCreatedListSize());
|
||||||
|
// load deleted list
|
||||||
|
List<INode> dlist = loadDeletedList(refList, in, dir,
|
||||||
|
diffInPb.getDeletedINodeList(), diffInPb.getDeletedINodeRefList());
|
||||||
|
// create the directory diff
|
||||||
|
DirectoryDiff diff = new DirectoryDiff(snapshotId, copy, null,
|
||||||
|
childrenSize, clist, dlist, useRoot);
|
||||||
|
diffs.addFirst(diff);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Saving snapshot related information to protobuf based FSImage
|
||||||
|
*/
|
||||||
|
public final static class Saver {
|
||||||
|
private final FSNamesystem fsn;
|
||||||
|
private final FileSummary.Builder headers;
|
||||||
|
private final FSImageFormatProtobuf.Saver parent;
|
||||||
|
private final SaveNamespaceContext context;
|
||||||
|
|
||||||
|
public Saver(FSImageFormatProtobuf.Saver parent,
|
||||||
|
FileSummary.Builder headers, SaveNamespaceContext context,
|
||||||
|
FSNamesystem fsn) {
|
||||||
|
this.parent = parent;
|
||||||
|
this.headers = headers;
|
||||||
|
this.context = context;
|
||||||
|
this.fsn = fsn;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* save all the snapshottable directories and snapshots to fsimage
|
||||||
|
*/
|
||||||
|
public void serializeSnapshotSection(OutputStream out) throws IOException {
|
||||||
|
SnapshotManager sm = fsn.getSnapshotManager();
|
||||||
|
SnapshotSection.Builder b = SnapshotSection.newBuilder()
|
||||||
|
.setSnapshotCounter(sm.getSnapshotCounter())
|
||||||
|
.setNumSnapshots(sm.getNumSnapshots());
|
||||||
|
|
||||||
|
INodeDirectorySnapshottable[] snapshottables = sm.getSnapshottableDirs();
|
||||||
|
for (INodeDirectorySnapshottable sdir : snapshottables) {
|
||||||
|
b.addSnapshottableDir(sdir.getId());
|
||||||
|
}
|
||||||
|
b.build().writeDelimitedTo(out);
|
||||||
|
int i = 0;
|
||||||
|
for(INodeDirectorySnapshottable sdir : snapshottables) {
|
||||||
|
for(Snapshot s : sdir.getSnapshotsByNames()) {
|
||||||
|
Root sroot = s.getRoot();
|
||||||
|
SnapshotSection.Snapshot.Builder sb = SnapshotSection.Snapshot
|
||||||
|
.newBuilder().setSnapshotId(s.getId());
|
||||||
|
INodeSection.INodeDirectory.Builder db = buildINodeDirectory(sroot,
|
||||||
|
parent.getSaverContext().getStringMap());
|
||||||
|
INodeSection.INode r = INodeSection.INode.newBuilder()
|
||||||
|
.setId(sroot.getId())
|
||||||
|
.setType(INodeSection.INode.Type.DIRECTORY)
|
||||||
|
.setName(ByteString.copyFrom(sroot.getLocalNameBytes()))
|
||||||
|
.setDirectory(db).build();
|
||||||
|
sb.setRoot(r).build().writeDelimitedTo(out);
|
||||||
|
i++;
|
||||||
|
if (i % FSImageFormatProtobuf.Saver.CHECK_CANCEL_INTERVAL == 0) {
|
||||||
|
context.checkCancelled();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Preconditions.checkState(i == sm.getNumSnapshots());
|
||||||
|
parent.commitSection(headers, FSImageFormatProtobuf.SectionName.SNAPSHOT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This can only be called after serializing both INode_Dir and SnapshotDiff
|
||||||
|
*/
|
||||||
|
public void serializeINodeReferenceSection(OutputStream out)
|
||||||
|
throws IOException {
|
||||||
|
final List<INodeReference> refList = parent.getSaverContext()
|
||||||
|
.getRefList();
|
||||||
|
for (INodeReference ref : refList) {
|
||||||
|
INodeReferenceSection.INodeReference.Builder rb = buildINodeReference(ref);
|
||||||
|
rb.build().writeDelimitedTo(out);
|
||||||
|
}
|
||||||
|
parent.commitSection(headers, SectionName.INODE_REFRENCE);
|
||||||
|
}
|
||||||
|
|
||||||
|
private INodeReferenceSection.INodeReference.Builder buildINodeReference(
|
||||||
|
INodeReference ref) throws IOException {
|
||||||
|
INodeReferenceSection.INodeReference.Builder rb =
|
||||||
|
INodeReferenceSection.INodeReference.newBuilder().
|
||||||
|
setReferredId(ref.getId());
|
||||||
|
if (ref instanceof WithName) {
|
||||||
|
rb.setLastSnapshotId(((WithName) ref).getLastSnapshotId()).setName(
|
||||||
|
ByteString.copyFrom(ref.getLocalNameBytes()));
|
||||||
|
} else if (ref instanceof DstReference) {
|
||||||
|
rb.setDstSnapshotId(((DstReference) ref).getDstSnapshotId());
|
||||||
|
}
|
||||||
|
return rb;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* save all the snapshot diff to fsimage
|
||||||
|
*/
|
||||||
|
public void serializeSnapshotDiffSection(OutputStream out)
|
||||||
|
throws IOException {
|
||||||
|
INodeMap inodesMap = fsn.getFSDirectory().getINodeMap();
|
||||||
|
final List<INodeReference> refList = parent.getSaverContext()
|
||||||
|
.getRefList();
|
||||||
|
int i = 0;
|
||||||
|
Iterator<INodeWithAdditionalFields> iter = inodesMap.getMapIterator();
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
INodeWithAdditionalFields inode = iter.next();
|
||||||
|
if (inode.isFile()) {
|
||||||
|
serializeFileDiffList(inode.asFile(), out);
|
||||||
|
} else if (inode.isDirectory()) {
|
||||||
|
serializeDirDiffList(inode.asDirectory(), refList, out);
|
||||||
|
}
|
||||||
|
++i;
|
||||||
|
if (i % FSImageFormatProtobuf.Saver.CHECK_CANCEL_INTERVAL == 0) {
|
||||||
|
context.checkCancelled();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
parent.commitSection(headers,
|
||||||
|
FSImageFormatProtobuf.SectionName.SNAPSHOT_DIFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void serializeFileDiffList(INodeFile file, OutputStream out)
|
||||||
|
throws IOException {
|
||||||
|
FileWithSnapshotFeature sf = file.getFileWithSnapshotFeature();
|
||||||
|
if (sf != null) {
|
||||||
|
List<FileDiff> diffList = sf.getDiffs().asList();
|
||||||
|
SnapshotDiffSection.DiffEntry entry = SnapshotDiffSection.DiffEntry
|
||||||
|
.newBuilder().setInodeId(file.getId()).setType(Type.FILEDIFF)
|
||||||
|
.setNumOfDiff(diffList.size()).build();
|
||||||
|
entry.writeDelimitedTo(out);
|
||||||
|
for (int i = diffList.size() - 1; i >= 0; i--) {
|
||||||
|
FileDiff diff = diffList.get(i);
|
||||||
|
SnapshotDiffSection.FileDiff.Builder fb = SnapshotDiffSection.FileDiff
|
||||||
|
.newBuilder().setSnapshotId(diff.getSnapshotId())
|
||||||
|
.setFileSize(diff.getFileSize());
|
||||||
|
INodeFileAttributes copy = diff.snapshotINode;
|
||||||
|
if (copy != null) {
|
||||||
|
fb.setName(ByteString.copyFrom(copy.getLocalNameBytes()))
|
||||||
|
.setSnapshotCopy(buildINodeFile(copy, parent.getSaverContext().getStringMap()));
|
||||||
|
}
|
||||||
|
fb.build().writeDelimitedTo(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveCreatedList(List<INode> created, OutputStream out)
|
||||||
|
throws IOException {
|
||||||
|
// local names of the created list member
|
||||||
|
for (INode c : created) {
|
||||||
|
SnapshotDiffSection.CreatedListEntry.newBuilder()
|
||||||
|
.setName(ByteString.copyFrom(c.getLocalNameBytes())).build()
|
||||||
|
.writeDelimitedTo(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void serializeDirDiffList(INodeDirectory dir,
|
||||||
|
final List<INodeReference> refList, OutputStream out)
|
||||||
|
throws IOException {
|
||||||
|
DirectoryWithSnapshotFeature sf = dir.getDirectoryWithSnapshotFeature();
|
||||||
|
if (sf != null) {
|
||||||
|
List<DirectoryDiff> diffList = sf.getDiffs().asList();
|
||||||
|
SnapshotDiffSection.DiffEntry entry = SnapshotDiffSection.DiffEntry
|
||||||
|
.newBuilder().setInodeId(dir.getId()).setType(Type.DIRECTORYDIFF)
|
||||||
|
.setNumOfDiff(diffList.size()).build();
|
||||||
|
entry.writeDelimitedTo(out);
|
||||||
|
for (int i = diffList.size() - 1; i >= 0; i--) { // reverse order!
|
||||||
|
DirectoryDiff diff = diffList.get(i);
|
||||||
|
SnapshotDiffSection.DirectoryDiff.Builder db = SnapshotDiffSection.
|
||||||
|
DirectoryDiff.newBuilder().setSnapshotId(diff.getSnapshotId())
|
||||||
|
.setChildrenSize(diff.getChildrenSize())
|
||||||
|
.setIsSnapshotRoot(diff.isSnapshotRoot());
|
||||||
|
INodeDirectoryAttributes copy = diff.snapshotINode;
|
||||||
|
if (!diff.isSnapshotRoot() && copy != null) {
|
||||||
|
db.setName(ByteString.copyFrom(copy.getLocalNameBytes()))
|
||||||
|
.setSnapshotCopy(
|
||||||
|
buildINodeDirectory(copy, parent.getSaverContext().getStringMap()));
|
||||||
|
}
|
||||||
|
// process created list and deleted list
|
||||||
|
List<INode> created = diff.getChildrenDiff()
|
||||||
|
.getList(ListType.CREATED);
|
||||||
|
db.setCreatedListSize(created.size());
|
||||||
|
List<INode> deleted = diff.getChildrenDiff().getList(ListType.DELETED);
|
||||||
|
for (INode d : deleted) {
|
||||||
|
if (d.isReference()) {
|
||||||
|
refList.add(d.asReference());
|
||||||
|
db.addDeletedINodeRef(refList.size() - 1);
|
||||||
|
} else {
|
||||||
|
db.addDeletedINode(d.getId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
db.build().writeDelimitedTo(out);
|
||||||
|
saveCreatedList(created, out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private FSImageFormatPBSnapshot(){}
|
||||||
|
}
|
|
@ -27,7 +27,6 @@
|
||||||
|
|
||||||
import org.apache.hadoop.hdfs.DFSUtil;
|
import org.apache.hadoop.hdfs.DFSUtil;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
|
import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSImageFormat.Loader;
|
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
|
import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.INode;
|
import org.apache.hadoop.hdfs.server.namenode.INode;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
|
import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
|
||||||
|
@ -137,7 +136,7 @@ private static FileDiff loadFileDiff(FileDiff posterior, DataInput in,
|
||||||
* @param parent The directory that the created list belongs to.
|
* @param parent The directory that the created list belongs to.
|
||||||
* @return The created node.
|
* @return The created node.
|
||||||
*/
|
*/
|
||||||
private static INode loadCreated(byte[] createdNodeName,
|
public static INode loadCreated(byte[] createdNodeName,
|
||||||
INodeDirectory parent) throws IOException {
|
INodeDirectory parent) throws IOException {
|
||||||
// the INode in the created list should be a reference to another INode
|
// the INode in the created list should be a reference to another INode
|
||||||
// in posterior SnapshotDiffs or one of the current children
|
// in posterior SnapshotDiffs or one of the current children
|
||||||
|
@ -209,11 +208,13 @@ private static List<INode> loadDeletedList(INodeDirectory parent,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load snapshots and snapshotQuota for a Snapshottable directory.
|
* Load snapshots and snapshotQuota for a Snapshottable directory.
|
||||||
* @param snapshottableParent The snapshottable directory for loading.
|
*
|
||||||
* @param numSnapshots The number of snapshots that the directory has.
|
* @param snapshottableParent
|
||||||
* @param in The {@link DataInput} instance to read.
|
* The snapshottable directory for loading.
|
||||||
* @param loader The {@link Loader} instance that this loading procedure is
|
* @param numSnapshots
|
||||||
* using.
|
* The number of snapshots that the directory has.
|
||||||
|
* @param loader
|
||||||
|
* The loader
|
||||||
*/
|
*/
|
||||||
public static void loadSnapshotList(
|
public static void loadSnapshotList(
|
||||||
INodeDirectorySnapshottable snapshottableParent, int numSnapshots,
|
INodeDirectorySnapshottable snapshottableParent, int numSnapshots,
|
||||||
|
@ -231,10 +232,13 @@ public static void loadSnapshotList(
|
||||||
/**
|
/**
|
||||||
* Load the {@link SnapshotDiff} list for the INodeDirectoryWithSnapshot
|
* Load the {@link SnapshotDiff} list for the INodeDirectoryWithSnapshot
|
||||||
* directory.
|
* directory.
|
||||||
* @param dir The snapshottable directory for loading.
|
*
|
||||||
* @param in The {@link DataInput} instance to read.
|
* @param dir
|
||||||
* @param loader The {@link Loader} instance that this loading procedure is
|
* The snapshottable directory for loading.
|
||||||
* using.
|
* @param in
|
||||||
|
* The {@link DataInput} instance to read.
|
||||||
|
* @param loader
|
||||||
|
* The loader
|
||||||
*/
|
*/
|
||||||
public static void loadDirectoryDiffList(INodeDirectory dir,
|
public static void loadDirectoryDiffList(INodeDirectory dir,
|
||||||
DataInput in, FSImageFormat.Loader loader) throws IOException {
|
DataInput in, FSImageFormat.Loader loader) throws IOException {
|
||||||
|
|
|
@ -270,6 +270,23 @@ public int getNumSnapshots() {
|
||||||
return numSnapshots.get();
|
return numSnapshots.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setNumSnapshots(int num) {
|
||||||
|
numSnapshots.set(num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int getSnapshotCounter() {
|
||||||
|
return snapshotCounter;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setSnapshotCounter(int counter) {
|
||||||
|
snapshotCounter = counter;
|
||||||
|
}
|
||||||
|
|
||||||
|
INodeDirectorySnapshottable[] getSnapshottableDirs() {
|
||||||
|
return snapshottables.values().toArray(
|
||||||
|
new INodeDirectorySnapshottable[snapshottables.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Write {@link #snapshotCounter}, {@link #numSnapshots},
|
* Write {@link #snapshotCounter}, {@link #numSnapshots},
|
||||||
* and all snapshots to the DataOutput.
|
* and all snapshots to the DataOutput.
|
||||||
|
|
|
@ -107,6 +107,7 @@
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
import org.apache.hadoop.security.token.TokenIdentifier;
|
import org.apache.hadoop.security.token.TokenIdentifier;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Charsets;
|
import com.google.common.base.Charsets;
|
||||||
import com.sun.jersey.spi.container.ResourceFilters;
|
import com.sun.jersey.spi.container.ResourceFilters;
|
||||||
|
|
||||||
|
@ -160,9 +161,10 @@ private void init(final UserGroupInformation ugi,
|
||||||
response.setContentType(null);
|
response.setContentType(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
static DatanodeInfo chooseDatanode(final NameNode namenode,
|
static DatanodeInfo chooseDatanode(final NameNode namenode,
|
||||||
final String path, final HttpOpParam.Op op, final long openOffset,
|
final String path, final HttpOpParam.Op op, final long openOffset,
|
||||||
final long blocksize, final Configuration conf) throws IOException {
|
final long blocksize) throws IOException {
|
||||||
final BlockManager bm = namenode.getNamesystem().getBlockManager();
|
final BlockManager bm = namenode.getNamesystem().getBlockManager();
|
||||||
|
|
||||||
if (op == PutOpParam.Op.CREATE) {
|
if (op == PutOpParam.Op.CREATE) {
|
||||||
|
@ -201,7 +203,7 @@ static DatanodeInfo chooseDatanode(final NameNode namenode,
|
||||||
final LocatedBlocks locations = np.getBlockLocations(path, offset, 1);
|
final LocatedBlocks locations = np.getBlockLocations(path, offset, 1);
|
||||||
final int count = locations.locatedBlockCount();
|
final int count = locations.locatedBlockCount();
|
||||||
if (count > 0) {
|
if (count > 0) {
|
||||||
return JspHelper.bestNode(locations.get(0).getLocations(), false, conf);
|
return bestNode(locations.get(0).getLocations());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -210,13 +212,26 @@ static DatanodeInfo chooseDatanode(final NameNode namenode,
|
||||||
).chooseRandom(NodeBase.ROOT);
|
).chooseRandom(NodeBase.ROOT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Choose the datanode to redirect the request. Note that the nodes have been
|
||||||
|
* sorted based on availability and network distances, thus it is sufficient
|
||||||
|
* to return the first element of the node here.
|
||||||
|
*/
|
||||||
|
private static DatanodeInfo bestNode(DatanodeInfo[] nodes) throws IOException {
|
||||||
|
if (nodes.length == 0 || nodes[0].isDecommissioned()) {
|
||||||
|
throw new IOException("No active nodes contain this block");
|
||||||
|
}
|
||||||
|
return nodes[0];
|
||||||
|
}
|
||||||
|
|
||||||
private Token<? extends TokenIdentifier> generateDelegationToken(
|
private Token<? extends TokenIdentifier> generateDelegationToken(
|
||||||
final NameNode namenode, final UserGroupInformation ugi,
|
final NameNode namenode, final UserGroupInformation ugi,
|
||||||
final String renewer) throws IOException {
|
final String renewer) throws IOException {
|
||||||
final Credentials c = DelegationTokenSecretManager.createCredentials(
|
final Credentials c = DelegationTokenSecretManager.createCredentials(
|
||||||
namenode, ugi, renewer != null? renewer: ugi.getShortUserName());
|
namenode, ugi, renewer != null? renewer: ugi.getShortUserName());
|
||||||
final Token<? extends TokenIdentifier> t = c.getAllTokens().iterator().next();
|
final Token<? extends TokenIdentifier> t = c.getAllTokens().iterator().next();
|
||||||
Text kind = request.getScheme().equals("http") ? WebHdfsFileSystem.TOKEN_KIND : SWebHdfsFileSystem.TOKEN_KIND;
|
Text kind = request.getScheme().equals("http") ? WebHdfsFileSystem.TOKEN_KIND
|
||||||
|
: SWebHdfsFileSystem.TOKEN_KIND;
|
||||||
t.setKind(kind);
|
t.setKind(kind);
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
@ -227,9 +242,8 @@ private URI redirectURI(final NameNode namenode,
|
||||||
final String path, final HttpOpParam.Op op, final long openOffset,
|
final String path, final HttpOpParam.Op op, final long openOffset,
|
||||||
final long blocksize,
|
final long blocksize,
|
||||||
final Param<?, ?>... parameters) throws URISyntaxException, IOException {
|
final Param<?, ?>... parameters) throws URISyntaxException, IOException {
|
||||||
final Configuration conf = (Configuration)context.getAttribute(JspHelper.CURRENT_CONF);
|
|
||||||
final DatanodeInfo dn = chooseDatanode(namenode, path, op, openOffset,
|
final DatanodeInfo dn = chooseDatanode(namenode, path, op, openOffset,
|
||||||
blocksize, conf);
|
blocksize);
|
||||||
|
|
||||||
final String delegationQuery;
|
final String delegationQuery;
|
||||||
if (!UserGroupInformation.isSecurityEnabled()) {
|
if (!UserGroupInformation.isSecurityEnabled()) {
|
||||||
|
|
|
@ -0,0 +1,160 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.common.io.LimitInputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is the tool for analyzing file sizes in the namespace image. In order to
|
||||||
|
* run the tool one should define a range of integers <tt>[0, maxSize]</tt> by
|
||||||
|
* specifying <tt>maxSize</tt> and a <tt>step</tt>. The range of integers is
|
||||||
|
* divided into segments of size <tt>step</tt>:
|
||||||
|
* <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>, and the visitor
|
||||||
|
* calculates how many files in the system fall into each segment
|
||||||
|
* <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. Note that files larger than
|
||||||
|
* <tt>maxSize</tt> always fall into the very last segment.
|
||||||
|
*
|
||||||
|
* <h3>Input.</h3>
|
||||||
|
* <ul>
|
||||||
|
* <li><tt>filename</tt> specifies the location of the image file;</li>
|
||||||
|
* <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
|
||||||
|
* sizes considered by the visitor;</li>
|
||||||
|
* <li><tt>step</tt> the range is divided into segments of size step.</li>
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* <h3>Output.</h3> The output file is formatted as a tab separated two column
|
||||||
|
* table: Size and NumFiles. Where Size represents the start of the segment, and
|
||||||
|
* numFiles is the number of files form the image which size falls in this
|
||||||
|
* segment.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
final class FileDistributionCalculator {
|
||||||
|
private final static long MAX_SIZE_DEFAULT = 0x2000000000L; // 1/8 TB = 2^37
|
||||||
|
private final static int INTERVAL_DEFAULT = 0x200000; // 2 MB = 2^21
|
||||||
|
|
||||||
|
private final Configuration conf;
|
||||||
|
private final long maxSize;
|
||||||
|
private final int steps;
|
||||||
|
private final PrintWriter out;
|
||||||
|
|
||||||
|
private int[] distribution;
|
||||||
|
private int totalFiles;
|
||||||
|
private int totalDirectories;
|
||||||
|
private int totalBlocks;
|
||||||
|
private long totalSpace;
|
||||||
|
private long maxFileSize;
|
||||||
|
|
||||||
|
FileDistributionCalculator(Configuration conf, long maxSize, int steps,
|
||||||
|
PrintWriter out) {
|
||||||
|
this.conf = conf;
|
||||||
|
this.maxSize = maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize;
|
||||||
|
this.steps = steps == 0 ? INTERVAL_DEFAULT : steps;
|
||||||
|
this.out = out;
|
||||||
|
long numIntervals = this.maxSize / this.steps;
|
||||||
|
this.distribution = new int[1 + (int) (numIntervals)];
|
||||||
|
Preconditions.checkState(numIntervals < Integer.MAX_VALUE,
|
||||||
|
"Too many distribution intervals");
|
||||||
|
}
|
||||||
|
|
||||||
|
void visit(RandomAccessFile file) throws IOException {
|
||||||
|
if (!FSImageUtil.checkFileFormat(file)) {
|
||||||
|
throw new IOException("Unrecognized FSImage");
|
||||||
|
}
|
||||||
|
|
||||||
|
FileSummary summary = FSImageUtil.loadSummary(file);
|
||||||
|
FileInputStream in = null;
|
||||||
|
try {
|
||||||
|
in = new FileInputStream(file.getFD());
|
||||||
|
for (FileSummary.Section s : summary.getSectionsList()) {
|
||||||
|
if (SectionName.fromString(s.getName()) != SectionName.INODE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
in.getChannel().position(s.getOffset());
|
||||||
|
InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
|
||||||
|
summary.getCodec(), new BufferedInputStream(new LimitInputStream(
|
||||||
|
in, s.getLength())));
|
||||||
|
run(is);
|
||||||
|
output();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
IOUtils.cleanup(null, in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void run(InputStream in) throws IOException {
|
||||||
|
INodeSection s = INodeSection.parseDelimitedFrom(in);
|
||||||
|
for (int i = 0; i < s.getNumInodes(); ++i) {
|
||||||
|
INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in);
|
||||||
|
if (p.getType() == INodeSection.INode.Type.FILE) {
|
||||||
|
++totalFiles;
|
||||||
|
INodeSection.INodeFile f = p.getFile();
|
||||||
|
totalBlocks += f.getBlocksCount();
|
||||||
|
long fileSize = 0;
|
||||||
|
for (BlockProto b : f.getBlocksList()) {
|
||||||
|
fileSize += b.getNumBytes() * f.getReplication();
|
||||||
|
}
|
||||||
|
maxFileSize = Math.max(fileSize, maxFileSize);
|
||||||
|
totalSpace += fileSize;
|
||||||
|
|
||||||
|
int bucket = fileSize > maxSize ? distribution.length - 1 : (int) Math
|
||||||
|
.ceil((double)fileSize / steps);
|
||||||
|
++distribution[bucket];
|
||||||
|
|
||||||
|
} else if (p.getType() == INodeSection.INode.Type.DIRECTORY) {
|
||||||
|
++totalDirectories;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i % (1 << 20) == 0) {
|
||||||
|
out.println("Processed " + i + " inodes.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void output() {
|
||||||
|
// write the distribution into the output file
|
||||||
|
out.print("Size\tNumFiles\n");
|
||||||
|
for (int i = 0; i < distribution.length; i++) {
|
||||||
|
if (distribution[i] != 0) {
|
||||||
|
out.print(((long) i * steps) + "\t" + distribution[i]);
|
||||||
|
out.print('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.print("totalFiles = " + totalFiles + "\n");
|
||||||
|
out.print("totalDirectories = " + totalDirectories + "\n");
|
||||||
|
out.print("totalBlocks = " + totalBlocks + "\n");
|
||||||
|
out.print("totalSpace = " + totalSpace + "\n");
|
||||||
|
out.print("maxFileSize = " + maxFileSize + "\n");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,250 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.permission.PermissionStatus;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeReferenceSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeFile;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.INodeId;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
import com.google.common.io.LimitInputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is the tool for analyzing file sizes in the namespace image. In order to
|
||||||
|
* run the tool one should define a range of integers <tt>[0, maxSize]</tt> by
|
||||||
|
* specifying <tt>maxSize</tt> and a <tt>step</tt>. The range of integers is
|
||||||
|
* divided into segments of size <tt>step</tt>:
|
||||||
|
* <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>, and the visitor
|
||||||
|
* calculates how many files in the system fall into each segment
|
||||||
|
* <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. Note that files larger than
|
||||||
|
* <tt>maxSize</tt> always fall into the very last segment.
|
||||||
|
*
|
||||||
|
* <h3>Input.</h3>
|
||||||
|
* <ul>
|
||||||
|
* <li><tt>filename</tt> specifies the location of the image file;</li>
|
||||||
|
* <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
|
||||||
|
* sizes considered by the visitor;</li>
|
||||||
|
* <li><tt>step</tt> the range is divided into segments of size step.</li>
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* <h3>Output.</h3> The output file is formatted as a tab separated two column
|
||||||
|
* table: Size and NumFiles. Where Size represents the start of the segment, and
|
||||||
|
* numFiles is the number of files form the image which size falls in this
|
||||||
|
* segment.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
final class LsrPBImage {
|
||||||
|
private final Configuration conf;
|
||||||
|
private final PrintWriter out;
|
||||||
|
private String[] stringTable;
|
||||||
|
private HashMap<Long, INodeSection.INode> inodes = Maps.newHashMap();
|
||||||
|
private HashMap<Long, long[]> dirmap = Maps.newHashMap();
|
||||||
|
private ArrayList<INodeReferenceSection.INodeReference> refList = Lists.newArrayList();
|
||||||
|
|
||||||
|
public LsrPBImage(Configuration conf, PrintWriter out) {
|
||||||
|
this.conf = conf;
|
||||||
|
this.out = out;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(RandomAccessFile file) throws IOException {
|
||||||
|
if (!FSImageUtil.checkFileFormat(file)) {
|
||||||
|
throw new IOException("Unrecognized FSImage");
|
||||||
|
}
|
||||||
|
|
||||||
|
FileSummary summary = FSImageUtil.loadSummary(file);
|
||||||
|
FileInputStream fin = null;
|
||||||
|
try {
|
||||||
|
fin = new FileInputStream(file.getFD());
|
||||||
|
|
||||||
|
ArrayList<FileSummary.Section> sections = Lists.newArrayList(summary
|
||||||
|
.getSectionsList());
|
||||||
|
Collections.sort(sections, new Comparator<FileSummary.Section>() {
|
||||||
|
@Override
|
||||||
|
public int compare(FileSummary.Section s1, FileSummary.Section s2) {
|
||||||
|
SectionName n1 = SectionName.fromString(s1.getName());
|
||||||
|
SectionName n2 = SectionName.fromString(s2.getName());
|
||||||
|
if (n1 == null) {
|
||||||
|
return n2 == null ? 0 : -1;
|
||||||
|
} else if (n2 == null) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return n1.ordinal() - n2.ordinal();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
for (FileSummary.Section s : sections) {
|
||||||
|
fin.getChannel().position(s.getOffset());
|
||||||
|
InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
|
||||||
|
summary.getCodec(), new BufferedInputStream(new LimitInputStream(
|
||||||
|
fin, s.getLength())));
|
||||||
|
|
||||||
|
switch (SectionName.fromString(s.getName())) {
|
||||||
|
case STRING_TABLE:
|
||||||
|
loadStringTable(is);
|
||||||
|
break;
|
||||||
|
case INODE:
|
||||||
|
loadINodeSection(is);
|
||||||
|
break;
|
||||||
|
case INODE_REFRENCE:
|
||||||
|
loadINodeReferenceSection(is);
|
||||||
|
break;
|
||||||
|
case INODE_DIR:
|
||||||
|
loadINodeDirectorySection(is);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
list("", INodeId.ROOT_INODE_ID);
|
||||||
|
} finally {
|
||||||
|
IOUtils.cleanup(null, fin);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void list(String parent, long dirId) {
|
||||||
|
INode inode = inodes.get(dirId);
|
||||||
|
listINode(parent.isEmpty() ? "/" : parent, inode);
|
||||||
|
long[] children = dirmap.get(dirId);
|
||||||
|
if (children == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
String newParent = parent + inode.getName().toStringUtf8() + "/";
|
||||||
|
for (long cid : children) {
|
||||||
|
list(newParent, cid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void listINode(String parent, INode inode) {
|
||||||
|
switch (inode.getType()) {
|
||||||
|
case FILE: {
|
||||||
|
INodeFile f = inode.getFile();
|
||||||
|
PermissionStatus p = FSImageFormatPBINode.Loader.loadPermission(
|
||||||
|
f.getPermission(), stringTable);
|
||||||
|
out.print(String.format("-%s %2s %8s %10s %10s %10d %s%s\n", p
|
||||||
|
.getPermission().toString(), f.getReplication(), p.getUserName(), p
|
||||||
|
.getGroupName(), f.getModificationTime(), getFileSize(f), parent,
|
||||||
|
inode.getName().toStringUtf8()));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case DIRECTORY: {
|
||||||
|
INodeDirectory d = inode.getDirectory();
|
||||||
|
PermissionStatus p = FSImageFormatPBINode.Loader.loadPermission(
|
||||||
|
d.getPermission(), stringTable);
|
||||||
|
out.print(String.format("d%s - %8s %10s %10s %10d %s%s\n", p
|
||||||
|
.getPermission().toString(), p.getUserName(), p.getGroupName(), d
|
||||||
|
.getModificationTime(), 0, parent, inode.getName().toStringUtf8()));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SYMLINK: {
|
||||||
|
INodeSymlink d = inode.getSymlink();
|
||||||
|
PermissionStatus p = FSImageFormatPBINode.Loader.loadPermission(
|
||||||
|
d.getPermission(), stringTable);
|
||||||
|
out.print(String.format("-%s - %8s %10s %10s %10d %s%s -> %s\n", p
|
||||||
|
.getPermission().toString(), p.getUserName(), p.getGroupName(), 0, 0,
|
||||||
|
parent, inode.getName().toStringUtf8(), d.getTarget().toStringUtf8()));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private long getFileSize(INodeFile f) {
|
||||||
|
long size = 0;
|
||||||
|
for (BlockProto p : f.getBlocksList()) {
|
||||||
|
size += p.getNumBytes();
|
||||||
|
}
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void loadINodeDirectorySection(InputStream in) throws IOException {
|
||||||
|
while (true) {
|
||||||
|
INodeDirectorySection.DirEntry e = INodeDirectorySection.DirEntry
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
// note that in is a LimitedInputStream
|
||||||
|
if (e == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
long[] l = new long[e.getChildrenCount() + e.getRefChildrenCount()];
|
||||||
|
for (int i = 0; i < e.getChildrenCount(); ++i) {
|
||||||
|
l[i] = e.getChildren(i);
|
||||||
|
}
|
||||||
|
for (int i = e.getChildrenCount(); i < l.length; i++) {
|
||||||
|
int refId = e.getRefChildren(i - e.getChildrenCount());
|
||||||
|
l[i] = refList.get(refId).getReferredId();
|
||||||
|
}
|
||||||
|
dirmap.put(e.getParent(), l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void loadINodeReferenceSection(InputStream in) throws IOException {
|
||||||
|
while (true) {
|
||||||
|
INodeReferenceSection.INodeReference e = INodeReferenceSection
|
||||||
|
.INodeReference.parseDelimitedFrom(in);
|
||||||
|
if (e == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
refList.add(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void loadINodeSection(InputStream in) throws IOException {
|
||||||
|
INodeSection s = INodeSection.parseDelimitedFrom(in);
|
||||||
|
for (int i = 0; i < s.getNumInodes(); ++i) {
|
||||||
|
INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in);
|
||||||
|
inodes.put(p.getId(), p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void loadStringTable(InputStream in) throws IOException {
|
||||||
|
StringTableSection s = StringTableSection.parseDelimitedFrom(in);
|
||||||
|
stringTable = new String[s.getNumEntry() + 1];
|
||||||
|
for (int i = 0; i < s.getNumEntry(); ++i) {
|
||||||
|
StringTableSection.Entry e = StringTableSection.Entry
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
stringTable[e.getId()] = e.getStr();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,178 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
|
||||||
|
|
||||||
|
import java.io.EOFException;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
|
|
||||||
|
import org.apache.commons.cli.CommandLine;
|
||||||
|
import org.apache.commons.cli.CommandLineParser;
|
||||||
|
import org.apache.commons.cli.OptionBuilder;
|
||||||
|
import org.apache.commons.cli.Options;
|
||||||
|
import org.apache.commons.cli.ParseException;
|
||||||
|
import org.apache.commons.cli.PosixParser;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OfflineImageViewer to dump the contents of an Hadoop image file to XML or the
|
||||||
|
* console. Main entry point into utility, either via the command line or
|
||||||
|
* programatically.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class OfflineImageViewerPB {
|
||||||
|
public static final Log LOG = LogFactory.getLog(OfflineImageViewerPB.class);
|
||||||
|
|
||||||
|
private final static String usage = "Usage: bin/hdfs oiv [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n"
|
||||||
|
+ "Offline Image Viewer\n"
|
||||||
|
+ "View a Hadoop fsimage INPUTFILE using the specified PROCESSOR,\n"
|
||||||
|
+ "saving the results in OUTPUTFILE.\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "The oiv utility will attempt to parse correctly formed image files\n"
|
||||||
|
+ "and will abort fail with mal-formed image files.\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "The tool works offline and does not require a running cluster in\n"
|
||||||
|
+ "order to process an image file.\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "The following image processors are available:\n"
|
||||||
|
+ " * Ls: The default image processor generates an lsr-style listing\n"
|
||||||
|
+ " of the files in the namespace, with the same fields in the same\n"
|
||||||
|
+ " order. Note that in order to correctly determine file sizes,\n"
|
||||||
|
+ " this formatter cannot skip blocks and will override the\n"
|
||||||
|
+ " -skipBlocks option.\n"
|
||||||
|
+ " * XML: This processor creates an XML document with all elements of\n"
|
||||||
|
+ " the fsimage enumerated, suitable for further analysis by XML\n"
|
||||||
|
+ " tools.\n"
|
||||||
|
+ " * FileDistribution: This processor analyzes the file size\n"
|
||||||
|
+ " distribution in the image.\n"
|
||||||
|
+ " -maxSize specifies the range [0, maxSize] of file sizes to be\n"
|
||||||
|
+ " analyzed (128GB by default).\n"
|
||||||
|
+ " -step defines the granularity of the distribution. (2MB by default)\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "Required command line arguments:\n"
|
||||||
|
+ "-i,--inputFile <arg> FSImage file to process.\n"
|
||||||
|
+ "-o,--outputFile <arg> Name of output file. If the specified\n"
|
||||||
|
+ " file exists, it will be overwritten.\n"
|
||||||
|
+ "\n"
|
||||||
|
+ "Optional command line arguments:\n"
|
||||||
|
+ "-p,--processor <arg> Select which type of processor to apply\n"
|
||||||
|
+ " against image file."
|
||||||
|
+ " (Ls|XML|FileDistribution).\n"
|
||||||
|
+ "-h,--help Display usage information and exit\n";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build command-line options and descriptions
|
||||||
|
*/
|
||||||
|
private static Options buildOptions() {
|
||||||
|
Options options = new Options();
|
||||||
|
|
||||||
|
// Build in/output file arguments, which are required, but there is no
|
||||||
|
// addOption method that can specify this
|
||||||
|
OptionBuilder.isRequired();
|
||||||
|
OptionBuilder.hasArgs();
|
||||||
|
OptionBuilder.withLongOpt("outputFile");
|
||||||
|
options.addOption(OptionBuilder.create("o"));
|
||||||
|
|
||||||
|
OptionBuilder.isRequired();
|
||||||
|
OptionBuilder.hasArgs();
|
||||||
|
OptionBuilder.withLongOpt("inputFile");
|
||||||
|
options.addOption(OptionBuilder.create("i"));
|
||||||
|
|
||||||
|
options.addOption("p", "processor", true, "");
|
||||||
|
options.addOption("h", "help", false, "");
|
||||||
|
options.addOption("skipBlocks", false, "");
|
||||||
|
options.addOption("printToScreen", false, "");
|
||||||
|
options.addOption("delimiter", true, "");
|
||||||
|
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Entry point to command-line-driven operation. User may specify options and
|
||||||
|
* start fsimage viewer from the command line. Program will process image file
|
||||||
|
* and exit cleanly or, if an error is encountered, inform user and exit.
|
||||||
|
*
|
||||||
|
* @param args
|
||||||
|
* Command line options
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static void main(String[] args) throws IOException {
|
||||||
|
Options options = buildOptions();
|
||||||
|
if (args.length == 0) {
|
||||||
|
printUsage();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
CommandLineParser parser = new PosixParser();
|
||||||
|
CommandLine cmd;
|
||||||
|
|
||||||
|
try {
|
||||||
|
cmd = parser.parse(options, args);
|
||||||
|
} catch (ParseException e) {
|
||||||
|
System.out.println("Error parsing command-line options: ");
|
||||||
|
printUsage();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cmd.hasOption("h")) { // print help and exit
|
||||||
|
printUsage();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String inputFile = cmd.getOptionValue("i");
|
||||||
|
String processor = cmd.getOptionValue("p", "Ls");
|
||||||
|
String outputFile = cmd.getOptionValue("o");
|
||||||
|
|
||||||
|
PrintWriter out = (outputFile == null || outputFile.equals("-")) ? new PrintWriter(
|
||||||
|
System.out) : new PrintWriter(new File(outputFile));
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
try {
|
||||||
|
if (processor.equals("FileDistribution")) {
|
||||||
|
long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0"));
|
||||||
|
int step = Integer.parseInt(cmd.getOptionValue("step", "0"));
|
||||||
|
new FileDistributionCalculator(conf, maxSize, step, out)
|
||||||
|
.visit(new RandomAccessFile(inputFile, "r"));
|
||||||
|
} else if (processor.equals("XML")) {
|
||||||
|
new PBImageXmlWriter(conf, out).visit(new RandomAccessFile(inputFile,
|
||||||
|
"r"));
|
||||||
|
} else {
|
||||||
|
new LsrPBImage(conf, out).visit(new RandomAccessFile(inputFile, "r"));
|
||||||
|
}
|
||||||
|
} catch (EOFException e) {
|
||||||
|
System.err.println("Input file ended unexpectedly. Exiting");
|
||||||
|
} catch (IOException e) {
|
||||||
|
System.err.println("Encountered exception. Exiting: " + e.getMessage());
|
||||||
|
} finally {
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print application usage instructions.
|
||||||
|
*/
|
||||||
|
private static void printUsage() {
|
||||||
|
System.out.println(usage);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,433 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoExpirationProto;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeReferenceSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotSection;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.common.io.LimitInputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is the tool for analyzing file sizes in the namespace image. In order to
|
||||||
|
* run the tool one should define a range of integers <tt>[0, maxSize]</tt> by
|
||||||
|
* specifying <tt>maxSize</tt> and a <tt>step</tt>. The range of integers is
|
||||||
|
* divided into segments of size <tt>step</tt>:
|
||||||
|
* <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>, and the visitor
|
||||||
|
* calculates how many files in the system fall into each segment
|
||||||
|
* <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. Note that files larger than
|
||||||
|
* <tt>maxSize</tt> always fall into the very last segment.
|
||||||
|
*
|
||||||
|
* <h3>Input.</h3>
|
||||||
|
* <ul>
|
||||||
|
* <li><tt>filename</tt> specifies the location of the image file;</li>
|
||||||
|
* <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
|
||||||
|
* sizes considered by the visitor;</li>
|
||||||
|
* <li><tt>step</tt> the range is divided into segments of size step.</li>
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* <h3>Output.</h3> The output file is formatted as a tab separated two column
|
||||||
|
* table: Size and NumFiles. Where Size represents the start of the segment, and
|
||||||
|
* numFiles is the number of files form the image which size falls in this
|
||||||
|
* segment.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public final class PBImageXmlWriter {
|
||||||
|
private final Configuration conf;
|
||||||
|
private final PrintWriter out;
|
||||||
|
private String[] stringTable;
|
||||||
|
|
||||||
|
public PBImageXmlWriter(Configuration conf, PrintWriter out) {
|
||||||
|
this.conf = conf;
|
||||||
|
this.out = out;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(RandomAccessFile file) throws IOException {
|
||||||
|
if (!FSImageUtil.checkFileFormat(file)) {
|
||||||
|
throw new IOException("Unrecognized FSImage");
|
||||||
|
}
|
||||||
|
|
||||||
|
FileSummary summary = FSImageUtil.loadSummary(file);
|
||||||
|
FileInputStream fin = null;
|
||||||
|
try {
|
||||||
|
fin = new FileInputStream(file.getFD());
|
||||||
|
out.print("<?xml version=\"1.0\"?>\n");
|
||||||
|
|
||||||
|
ArrayList<FileSummary.Section> sections = Lists.newArrayList(summary
|
||||||
|
.getSectionsList());
|
||||||
|
Collections.sort(sections, new Comparator<FileSummary.Section>() {
|
||||||
|
@Override
|
||||||
|
public int compare(FileSummary.Section s1, FileSummary.Section s2) {
|
||||||
|
SectionName n1 = SectionName.fromString(s1.getName());
|
||||||
|
SectionName n2 = SectionName.fromString(s2.getName());
|
||||||
|
if (n1 == null) {
|
||||||
|
return n2 == null ? 0 : -1;
|
||||||
|
} else if (n2 == null) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return n1.ordinal() - n2.ordinal();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
for (FileSummary.Section s : sections) {
|
||||||
|
fin.getChannel().position(s.getOffset());
|
||||||
|
InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
|
||||||
|
summary.getCodec(), new BufferedInputStream(new LimitInputStream(
|
||||||
|
fin, s.getLength())));
|
||||||
|
|
||||||
|
switch (SectionName.fromString(s.getName())) {
|
||||||
|
case NS_INFO:
|
||||||
|
dumpNameSection(is);
|
||||||
|
break;
|
||||||
|
case STRING_TABLE:
|
||||||
|
loadStringTable(is);
|
||||||
|
break;
|
||||||
|
case INODE:
|
||||||
|
dumpINodeSection(is);
|
||||||
|
break;
|
||||||
|
case INODE_REFRENCE:
|
||||||
|
dumpINodeReferenceSection(is);
|
||||||
|
break;
|
||||||
|
case INODE_DIR:
|
||||||
|
dumpINodeDirectorySection(is);
|
||||||
|
break;
|
||||||
|
case FILES_UNDERCONSTRUCTION:
|
||||||
|
dumpFileUnderConstructionSection(is);
|
||||||
|
break;
|
||||||
|
case SNAPSHOT:
|
||||||
|
dumpSnapshotSection(is);
|
||||||
|
break;
|
||||||
|
case SNAPSHOT_DIFF:
|
||||||
|
dumpSnapshotDiffSection(is);
|
||||||
|
break;
|
||||||
|
case SECRET_MANAGER:
|
||||||
|
dumpSecretManagerSection(is);
|
||||||
|
break;
|
||||||
|
case CACHE_MANAGER:
|
||||||
|
dumpCacheManagerSection(is);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
IOUtils.cleanup(null, fin);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpCacheManagerSection(InputStream is) throws IOException {
|
||||||
|
out.print("<CacheManagerSection>");
|
||||||
|
CacheManagerSection s = CacheManagerSection.parseDelimitedFrom(is);
|
||||||
|
o("nextDirectiveId", s.getNextDirectiveId());
|
||||||
|
for (int i = 0; i < s.getNumPools(); ++i) {
|
||||||
|
CachePoolInfoProto p = CachePoolInfoProto.parseDelimitedFrom(is);
|
||||||
|
out.print("<pool>");
|
||||||
|
o("poolName", p.getPoolName()).o("ownerName", p.getOwnerName())
|
||||||
|
.o("groupName", p.getGroupName()).o("mode", p.getMode())
|
||||||
|
.o("limit", p.getLimit())
|
||||||
|
.o("maxRelativeExpiry", p.getMaxRelativeExpiry());
|
||||||
|
out.print("</pool>\n");
|
||||||
|
}
|
||||||
|
for (int i = 0; i < s.getNumPools(); ++i) {
|
||||||
|
CacheDirectiveInfoProto p = CacheDirectiveInfoProto
|
||||||
|
.parseDelimitedFrom(is);
|
||||||
|
out.print("<directive>");
|
||||||
|
o("id", p.getId()).o("path", p.getPath())
|
||||||
|
.o("replication", p.getReplication()).o("pool", p.getPool());
|
||||||
|
out.print("<expiration>");
|
||||||
|
CacheDirectiveInfoExpirationProto e = p.getExpiration();
|
||||||
|
o("millis", e.getMillis()).o("relatilve", e.getIsRelative());
|
||||||
|
out.print("</expiration>\n");
|
||||||
|
out.print("</directive>\n");
|
||||||
|
}
|
||||||
|
out.print("</CacheManagerSection>\n");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpFileUnderConstructionSection(InputStream in)
|
||||||
|
throws IOException {
|
||||||
|
out.print("<FileUnderConstructionSection>");
|
||||||
|
while (true) {
|
||||||
|
FileUnderConstructionEntry e = FileUnderConstructionEntry
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
if (e == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
out.print("<inode>");
|
||||||
|
o("id", e.getInodeId()).o("path", e.getFullPath());
|
||||||
|
out.print("</inode>\n");
|
||||||
|
}
|
||||||
|
out.print("</FileUnderConstructionSection>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpINodeDirectory(INodeDirectory d) {
|
||||||
|
o("mtime", d.getModificationTime()).o("permission",
|
||||||
|
dumpPermission(d.getPermission()));
|
||||||
|
|
||||||
|
if (d.hasDsQuota() && d.hasNsQuota()) {
|
||||||
|
o("nsquota", d.getNsQuota()).o("dsquota", d.getDsQuota());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpINodeDirectorySection(InputStream in) throws IOException {
|
||||||
|
out.print("<INodeDirectorySection>");
|
||||||
|
while (true) {
|
||||||
|
INodeDirectorySection.DirEntry e = INodeDirectorySection.DirEntry
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
// note that in is a LimitedInputStream
|
||||||
|
if (e == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
out.print("<directory>");
|
||||||
|
o("parent", e.getParent());
|
||||||
|
for (long id : e.getChildrenList()) {
|
||||||
|
o("inode", id);
|
||||||
|
}
|
||||||
|
for (int refId : e.getRefChildrenList()) {
|
||||||
|
o("inodereference-index", refId);
|
||||||
|
}
|
||||||
|
out.print("</directory>\n");
|
||||||
|
}
|
||||||
|
out.print("</INodeDirectorySection>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpINodeReferenceSection(InputStream in) throws IOException {
|
||||||
|
out.print("<INodeReferenceSection>");
|
||||||
|
while (true) {
|
||||||
|
INodeReferenceSection.INodeReference e = INodeReferenceSection
|
||||||
|
.INodeReference.parseDelimitedFrom(in);
|
||||||
|
if (e == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
dumpINodeReference(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpINodeReference(INodeReferenceSection.INodeReference r) {
|
||||||
|
out.print("<ref>");
|
||||||
|
o("referredId", r.getReferredId()).o("name", r.getName().toStringUtf8())
|
||||||
|
.o("dstSnapshotId", r.getDstSnapshotId())
|
||||||
|
.o("lastSnapshotId", r.getLastSnapshotId());
|
||||||
|
out.print("</ref>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpINodeFile(INodeSection.INodeFile f) {
|
||||||
|
o("replication", f.getReplication()).o("mtime", f.getModificationTime())
|
||||||
|
.o("atime", f.getAccessTime())
|
||||||
|
.o("perferredBlockSize", f.getPreferredBlockSize())
|
||||||
|
.o("permission", dumpPermission(f.getPermission()));
|
||||||
|
|
||||||
|
if (f.getBlocksCount() > 0) {
|
||||||
|
out.print("<blocks>");
|
||||||
|
for (BlockProto b : f.getBlocksList()) {
|
||||||
|
out.print("<block>");
|
||||||
|
o("id", b.getBlockId()).o("genstamp", b.getGenStamp()).o("numBytes",
|
||||||
|
b.getNumBytes());
|
||||||
|
out.print("</block>\n");
|
||||||
|
}
|
||||||
|
out.print("</blocks>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (f.hasFileUC()) {
|
||||||
|
INodeSection.FileUnderConstructionFeature u = f.getFileUC();
|
||||||
|
out.print("<file-under-construction>");
|
||||||
|
o("clientName", u.getClientName()).o("clientMachine",
|
||||||
|
u.getClientMachine());
|
||||||
|
out.print("</file-under-construction>\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpINodeSection(InputStream in) throws IOException {
|
||||||
|
INodeSection s = INodeSection.parseDelimitedFrom(in);
|
||||||
|
out.print("<INodeSection>");
|
||||||
|
o("lastInodeId", s.getLastInodeId());
|
||||||
|
for (int i = 0; i < s.getNumInodes(); ++i) {
|
||||||
|
INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in);
|
||||||
|
out.print("<inode>");
|
||||||
|
o("id", p.getId()).o("type", p.getType()).o("name",
|
||||||
|
p.getName().toStringUtf8());
|
||||||
|
|
||||||
|
if (p.hasFile()) {
|
||||||
|
dumpINodeFile(p.getFile());
|
||||||
|
} else if (p.hasDirectory()) {
|
||||||
|
dumpINodeDirectory(p.getDirectory());
|
||||||
|
} else if (p.hasSymlink()) {
|
||||||
|
dumpINodeSymlink(p.getSymlink());
|
||||||
|
}
|
||||||
|
|
||||||
|
out.print("</inode>\n");
|
||||||
|
}
|
||||||
|
out.print("</INodeSection>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpINodeSymlink(INodeSymlink s) {
|
||||||
|
o("permission", dumpPermission(s.getPermission())).o("target",
|
||||||
|
s.getTarget().toStringUtf8());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpNameSection(InputStream in) throws IOException {
|
||||||
|
NameSystemSection s = NameSystemSection.parseDelimitedFrom(in);
|
||||||
|
out.print("<NameSection>\n");
|
||||||
|
o("genstampV1", s.getGenstampV1()).o("genstampV2", s.getGenstampV2())
|
||||||
|
.o("genstampV1Limit", s.getGenstampV1Limit())
|
||||||
|
.o("lastAllocatedBlockId", s.getLastAllocatedBlockId())
|
||||||
|
.o("txid", s.getTransactionId());
|
||||||
|
out.print("<NameSection>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
private String dumpPermission(long permission) {
|
||||||
|
return FSImageFormatPBINode.Loader.loadPermission(permission, stringTable)
|
||||||
|
.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpSecretManagerSection(InputStream is) throws IOException {
|
||||||
|
out.print("<SecretManagerSection>");
|
||||||
|
SecretManagerSection s = SecretManagerSection.parseDelimitedFrom(is);
|
||||||
|
o("currentId", s.getCurrentId()).o("tokenSequenceNumber",
|
||||||
|
s.getTokenSequenceNumber());
|
||||||
|
out.print("</SecretManagerSection>");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpSnapshotDiffSection(InputStream in) throws IOException {
|
||||||
|
out.print("<SnapshotDiffSection>");
|
||||||
|
while (true) {
|
||||||
|
SnapshotDiffSection.DiffEntry e = SnapshotDiffSection.DiffEntry
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
if (e == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
out.print("<diff>");
|
||||||
|
o("inodeid", e.getInodeId());
|
||||||
|
switch (e.getType()) {
|
||||||
|
case FILEDIFF: {
|
||||||
|
for (int i = 0; i < e.getNumOfDiff(); ++i) {
|
||||||
|
out.print("<filediff>");
|
||||||
|
SnapshotDiffSection.FileDiff f = SnapshotDiffSection.FileDiff
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
o("snapshotId", f.getSnapshotId()).o("size", f.getFileSize()).o(
|
||||||
|
"name", f.getName().toStringUtf8());
|
||||||
|
out.print("</filediff>\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case DIRECTORYDIFF: {
|
||||||
|
for (int i = 0; i < e.getNumOfDiff(); ++i) {
|
||||||
|
out.print("<dirdiff>");
|
||||||
|
SnapshotDiffSection.DirectoryDiff d = SnapshotDiffSection.DirectoryDiff
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
o("snapshotId", d.getSnapshotId())
|
||||||
|
.o("isSnapshotroot", d.getIsSnapshotRoot())
|
||||||
|
.o("childrenSize", d.getChildrenSize())
|
||||||
|
.o("name", d.getName().toStringUtf8());
|
||||||
|
|
||||||
|
for (int j = 0; j < d.getCreatedListSize(); ++j) {
|
||||||
|
SnapshotDiffSection.CreatedListEntry ce = SnapshotDiffSection.CreatedListEntry
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
out.print("<created>");
|
||||||
|
o("name", ce.getName().toStringUtf8());
|
||||||
|
out.print("</created>\n");
|
||||||
|
}
|
||||||
|
for (long did : d.getDeletedINodeList()) {
|
||||||
|
out.print("<deleted>");
|
||||||
|
o("inode", did);
|
||||||
|
out.print("</deleted>\n");
|
||||||
|
}
|
||||||
|
for (int dRefid : d.getDeletedINodeRefList()) {
|
||||||
|
out.print("<deleted>");
|
||||||
|
o("inodereference-index", dRefid);
|
||||||
|
out.print("</deleted>\n");
|
||||||
|
}
|
||||||
|
out.print("</dirdiff>\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
out.print("</diff>");
|
||||||
|
}
|
||||||
|
out.print("<SnapshotDiffSection>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpSnapshotSection(InputStream in) throws IOException {
|
||||||
|
out.print("<SnapshotSection>");
|
||||||
|
SnapshotSection s = SnapshotSection.parseDelimitedFrom(in);
|
||||||
|
o("snapshotCounter", s.getSnapshotCounter());
|
||||||
|
if (s.getSnapshottableDirCount() > 0) {
|
||||||
|
out.print("<snapshottableDir>");
|
||||||
|
for (long id : s.getSnapshottableDirList()) {
|
||||||
|
o("dir", id);
|
||||||
|
}
|
||||||
|
out.print("</snapshottableDir>\n");
|
||||||
|
}
|
||||||
|
for (int i = 0; i < s.getNumSnapshots(); ++i) {
|
||||||
|
SnapshotSection.Snapshot pbs = SnapshotSection.Snapshot
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
o("snapshot", pbs.getSnapshotId());
|
||||||
|
}
|
||||||
|
out.print("</SnapshotSection>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void loadStringTable(InputStream in) throws IOException {
|
||||||
|
StringTableSection s = StringTableSection.parseDelimitedFrom(in);
|
||||||
|
stringTable = new String[s.getNumEntry() + 1];
|
||||||
|
for (int i = 0; i < s.getNumEntry(); ++i) {
|
||||||
|
StringTableSection.Entry e = StringTableSection.Entry
|
||||||
|
.parseDelimitedFrom(in);
|
||||||
|
stringTable[e.getId()] = e.getStr();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private PBImageXmlWriter o(final String e, final Object v) {
|
||||||
|
out.print("<" + e + ">" + v + "</" + e + ">");
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,284 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
option java_package = "org.apache.hadoop.hdfs.server.namenode";
|
||||||
|
option java_outer_classname = "FsImageProto";
|
||||||
|
|
||||||
|
package hadoop.hdfs.fsimage;
|
||||||
|
|
||||||
|
import "hdfs.proto";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This file defines the on-disk layout of the file system image. The
|
||||||
|
* layout is defined by the following EBNF grammar, in which angle
|
||||||
|
* brackets mark protobuf definitions. (e.g., <FileSummary>)
|
||||||
|
*
|
||||||
|
* FILE := MAGIC SECTION* <FileSummary> FileSummaryLength
|
||||||
|
* MAGIC := 'HDFSIMG1'
|
||||||
|
* SECTION := <NameSystemSection> | ...
|
||||||
|
* FileSummaryLength := 4 byte int
|
||||||
|
*
|
||||||
|
* Some notes:
|
||||||
|
*
|
||||||
|
* The codec field in FileSummary describes the compression codec used
|
||||||
|
* for all sections. The fileheader is always uncompressed.
|
||||||
|
*
|
||||||
|
* All protobuf messages are serialized in delimited form, which means
|
||||||
|
* that there always will be an integer indicates the size of the
|
||||||
|
* protobuf message.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
message FileSummary {
|
||||||
|
// The version of the above EBNF grammars.
|
||||||
|
required uint32 ondiskVersion = 1;
|
||||||
|
// layoutVersion describes which features are available in the
|
||||||
|
// FSImage.
|
||||||
|
required uint32 layoutVersion = 2;
|
||||||
|
optional string codec = 3;
|
||||||
|
// index for each section
|
||||||
|
message Section {
|
||||||
|
optional string name = 1;
|
||||||
|
optional uint64 length = 2;
|
||||||
|
optional uint64 offset = 3;
|
||||||
|
}
|
||||||
|
repeated Section sections = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Name: NS_INFO
|
||||||
|
*/
|
||||||
|
message NameSystemSection {
|
||||||
|
optional uint32 namespaceId = 1;
|
||||||
|
optional uint64 genstampV1 = 2;
|
||||||
|
optional uint64 genstampV2 = 3;
|
||||||
|
optional uint64 genstampV1Limit = 4;
|
||||||
|
optional uint64 lastAllocatedBlockId = 5;
|
||||||
|
optional uint64 transactionId = 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Permission is serialized as a 64-bit long. [0:24):[25:48):[48:64) (in Big Endian).
|
||||||
|
* The first and the second parts are the string ids of the user and
|
||||||
|
* group name, and the last 16 bits are the permission bits.
|
||||||
|
*
|
||||||
|
* Name: INODE
|
||||||
|
*/
|
||||||
|
message INodeSection {
|
||||||
|
/**
|
||||||
|
* under-construction feature for INodeFile
|
||||||
|
*/
|
||||||
|
message FileUnderConstructionFeature {
|
||||||
|
optional string clientName = 1;
|
||||||
|
optional string clientMachine = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message INodeFile {
|
||||||
|
optional uint32 replication = 1;
|
||||||
|
optional uint64 modificationTime = 2;
|
||||||
|
optional uint64 accessTime = 3;
|
||||||
|
optional uint64 preferredBlockSize = 4;
|
||||||
|
optional fixed64 permission = 5;
|
||||||
|
repeated BlockProto blocks = 6;
|
||||||
|
optional FileUnderConstructionFeature fileUC = 7;
|
||||||
|
}
|
||||||
|
|
||||||
|
message INodeDirectory {
|
||||||
|
optional uint64 modificationTime = 1;
|
||||||
|
// namespace quota
|
||||||
|
optional uint64 nsQuota = 2;
|
||||||
|
// diskspace quota
|
||||||
|
optional uint64 dsQuota = 3;
|
||||||
|
optional fixed64 permission = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
message INodeSymlink {
|
||||||
|
optional fixed64 permission = 1;
|
||||||
|
optional bytes target = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message INode {
|
||||||
|
enum Type {
|
||||||
|
FILE = 1;
|
||||||
|
DIRECTORY = 2;
|
||||||
|
SYMLINK = 3;
|
||||||
|
};
|
||||||
|
required Type type = 1;
|
||||||
|
required uint64 id = 2;
|
||||||
|
optional bytes name = 3;
|
||||||
|
|
||||||
|
optional INodeFile file = 4;
|
||||||
|
optional INodeDirectory directory = 5;
|
||||||
|
optional INodeSymlink symlink = 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
optional uint64 lastInodeId = 1;
|
||||||
|
optional uint64 numInodes = 2;
|
||||||
|
// repeated INodes..
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This section records information about under-construction files for
|
||||||
|
* reconstructing the lease map.
|
||||||
|
* NAME: FILES_UNDERCONSTRUCTION
|
||||||
|
*/
|
||||||
|
message FilesUnderConstructionSection {
|
||||||
|
message FileUnderConstructionEntry {
|
||||||
|
optional uint64 inodeId = 1;
|
||||||
|
optional string fullPath = 2;
|
||||||
|
}
|
||||||
|
// repeated FileUnderConstructionEntry...
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This section records the children of each directories
|
||||||
|
* NAME: INODE_DIR
|
||||||
|
*/
|
||||||
|
message INodeDirectorySection {
|
||||||
|
message DirEntry {
|
||||||
|
optional uint64 parent = 1;
|
||||||
|
// children that are not reference nodes
|
||||||
|
repeated uint64 children = 2 [packed = true];
|
||||||
|
// children that are reference nodes, each element is a reference node id
|
||||||
|
repeated uint32 refChildren = 3 [packed = true];
|
||||||
|
}
|
||||||
|
// repeated DirEntry, ended at the boundary of the section.
|
||||||
|
}
|
||||||
|
|
||||||
|
message INodeReferenceSection {
|
||||||
|
message INodeReference {
|
||||||
|
// id of the referred inode
|
||||||
|
optional uint64 referredId = 1;
|
||||||
|
// local name recorded in WithName
|
||||||
|
optional bytes name = 2;
|
||||||
|
// recorded in DstReference
|
||||||
|
optional uint32 dstSnapshotId = 3;
|
||||||
|
// recorded in WithName
|
||||||
|
optional uint32 lastSnapshotId = 4;
|
||||||
|
}
|
||||||
|
// repeated INodeReference...
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This section records the information about snapshot
|
||||||
|
* NAME: SNAPSHOT
|
||||||
|
*/
|
||||||
|
message SnapshotSection {
|
||||||
|
message Snapshot {
|
||||||
|
optional uint32 snapshotId = 1;
|
||||||
|
// Snapshot root
|
||||||
|
optional INodeSection.INode root = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
optional uint32 snapshotCounter = 1;
|
||||||
|
repeated uint64 snapshottableDir = 2 [packed = true];
|
||||||
|
// total number of snapshots
|
||||||
|
optional uint32 numSnapshots = 3;
|
||||||
|
// repeated Snapshot...
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This section records information about snapshot diffs
|
||||||
|
* NAME: SNAPSHOT_DIFF
|
||||||
|
*/
|
||||||
|
message SnapshotDiffSection {
|
||||||
|
message CreatedListEntry {
|
||||||
|
optional bytes name = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message DirectoryDiff {
|
||||||
|
optional uint32 snapshotId = 1;
|
||||||
|
optional uint32 childrenSize = 2;
|
||||||
|
optional bool isSnapshotRoot = 3;
|
||||||
|
optional bytes name = 4;
|
||||||
|
optional INodeSection.INodeDirectory snapshotCopy = 5;
|
||||||
|
optional uint32 createdListSize = 6;
|
||||||
|
repeated uint64 deletedINode = 7 [packed = true]; // id of deleted inodes
|
||||||
|
// id of reference nodes in the deleted list
|
||||||
|
repeated uint32 deletedINodeRef = 8 [packed = true];
|
||||||
|
// repeated CreatedListEntry (size is specified by createdListSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
message FileDiff {
|
||||||
|
optional uint32 snapshotId = 1;
|
||||||
|
optional uint64 fileSize = 2;
|
||||||
|
optional bytes name = 3;
|
||||||
|
optional INodeSection.INodeFile snapshotCopy = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
message DiffEntry {
|
||||||
|
enum Type {
|
||||||
|
FILEDIFF = 1;
|
||||||
|
DIRECTORYDIFF = 2;
|
||||||
|
}
|
||||||
|
required Type type = 1;
|
||||||
|
optional uint64 inodeId = 2;
|
||||||
|
optional uint32 numOfDiff = 3;
|
||||||
|
|
||||||
|
// repeated DirectoryDiff or FileDiff
|
||||||
|
}
|
||||||
|
|
||||||
|
// repeated DiffEntry
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This section maps string to id
|
||||||
|
* NAME: STRING_TABLE
|
||||||
|
*/
|
||||||
|
message StringTableSection {
|
||||||
|
message Entry {
|
||||||
|
optional uint32 id = 1;
|
||||||
|
optional string str = 2;
|
||||||
|
}
|
||||||
|
optional uint32 numEntry = 1;
|
||||||
|
// repeated Entry
|
||||||
|
}
|
||||||
|
|
||||||
|
message SecretManagerSection {
|
||||||
|
message DelegationKey {
|
||||||
|
optional uint32 id = 1;
|
||||||
|
optional uint64 expiryDate = 2;
|
||||||
|
optional bytes key = 3;
|
||||||
|
}
|
||||||
|
message PersistToken {
|
||||||
|
optional uint32 version = 1;
|
||||||
|
optional string owner = 2;
|
||||||
|
optional string renewer = 3;
|
||||||
|
optional string realUser = 4;
|
||||||
|
optional uint64 issueDate = 5;
|
||||||
|
optional uint64 maxDate = 6;
|
||||||
|
optional uint32 sequenceNumber = 7;
|
||||||
|
optional uint32 masterKeyId = 8;
|
||||||
|
optional uint64 expiryDate = 9;
|
||||||
|
}
|
||||||
|
optional uint32 currentId = 1;
|
||||||
|
optional uint32 tokenSequenceNumber = 2;
|
||||||
|
optional uint32 numKeys = 3;
|
||||||
|
optional uint32 numTokens = 4;
|
||||||
|
// repeated DelegationKey keys
|
||||||
|
// repeated PersistToken tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
message CacheManagerSection {
|
||||||
|
required uint64 nextDirectiveId = 1;
|
||||||
|
required uint32 numPools = 2;
|
||||||
|
required uint32 numDirectives = 3;
|
||||||
|
// repeated CachePoolInfoProto pools
|
||||||
|
// repeated CacheDirectiveInfoProto directives
|
||||||
|
}
|
||||||
|
|
|
@ -96,6 +96,14 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.namenode.secondary.https-address</name>
|
||||||
|
<value>0.0.0.0:50091</value>
|
||||||
|
<description>
|
||||||
|
The secondary namenode HTTPS server address and port.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.datanode.address</name>
|
<name>dfs.datanode.address</name>
|
||||||
<value>0.0.0.0:50010</value>
|
<value>0.0.0.0:50010</value>
|
||||||
|
@ -161,6 +169,16 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.client.cached.conn.retry</name>
|
||||||
|
<value>3</value>
|
||||||
|
<description>The number of times the HDFS client will pull a socket from the
|
||||||
|
cache. Once this number is exceeded, the client will try to create a new
|
||||||
|
socket.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.https.server.keystore.resource</name>
|
<name>dfs.https.server.keystore.resource</name>
|
||||||
<value>ssl-server.xml</value>
|
<value>ssl-server.xml</value>
|
||||||
|
@ -1300,7 +1318,16 @@
|
||||||
<name>dfs.journalnode.http-address</name>
|
<name>dfs.journalnode.http-address</name>
|
||||||
<value>0.0.0.0:8480</value>
|
<value>0.0.0.0:8480</value>
|
||||||
<description>
|
<description>
|
||||||
The address and port the JournalNode web UI listens on.
|
The address and port the JournalNode HTTP server listens on.
|
||||||
|
If the port is 0 then the server will start on a free port.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.journalnode.https-address</name>
|
||||||
|
<value>0.0.0.0:8481</value>
|
||||||
|
<description>
|
||||||
|
The address and port the JournalNode HTTPS server listens on.
|
||||||
If the port is 0 then the server will start on a free port.
|
If the port is 0 then the server will start on a free port.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
@ -1489,6 +1516,26 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.client.mmap.retry.timeout.ms</name>
|
||||||
|
<value>300000</value>
|
||||||
|
<description>
|
||||||
|
The minimum amount of time that we will wait before retrying a failed mmap
|
||||||
|
operation.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.client.short.circuit.replica.stale.threshold.ms</name>
|
||||||
|
<value>3000000</value>
|
||||||
|
<description>
|
||||||
|
The maximum amount of time that we will consider a short-circuit replica to
|
||||||
|
be valid, if there is no communication from the DataNode. After this time
|
||||||
|
has elapsed, we will re-fetch the short-circuit replica even if it is in
|
||||||
|
the cache.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.namenode.path.based.cache.block.map.allocation.percent</name>
|
<name>dfs.namenode.path.based.cache.block.map.allocation.percent</name>
|
||||||
<value>0.25</value>
|
<value>0.25</value>
|
||||||
|
@ -1618,4 +1665,15 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.client.context</name>
|
||||||
|
<value>default</value>
|
||||||
|
<description>
|
||||||
|
The name of the DFSClient context that we should use. Clients that share
|
||||||
|
a context share a socket cache and short-circuit cache, among other things.
|
||||||
|
You should only change this if you don't want to share with another set of
|
||||||
|
threads.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
</configuration>
|
</configuration>
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
<!--
|
<!--
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
@ -14,8 +16,6 @@
|
||||||
See the License for the specific language governing permissions and
|
See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
-->
|
-->
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||||
<head>
|
<head>
|
||||||
<link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
|
<link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
{"name": "nn", "url": "/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo"},
|
{"name": "nn", "url": "/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo"},
|
||||||
{"name": "nnstat", "url": "/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"},
|
{"name": "nnstat", "url": "/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"},
|
||||||
{"name": "fs", "url": "/jmx?qry=Hadoop:service=NameNode,name=FSNamesystemState"},
|
{"name": "fs", "url": "/jmx?qry=Hadoop:service=NameNode,name=FSNamesystemState"},
|
||||||
{"name": "mem", "url": "/jmx?qry=java.lang:type=Memory"},
|
{"name": "mem", "url": "/jmx?qry=java.lang:type=Memory"}
|
||||||
];
|
];
|
||||||
|
|
||||||
var HELPERS = {
|
var HELPERS = {
|
||||||
|
@ -166,14 +166,29 @@
|
||||||
|
|
||||||
$('#ui-tabs a[href="#tab-snapshot"]').click(load_snapshot_info);
|
$('#ui-tabs a[href="#tab-snapshot"]').click(load_snapshot_info);
|
||||||
|
|
||||||
var hash = window.location.hash;
|
function load_page() {
|
||||||
if (hash === "#tab-datanode") {
|
var hash = window.location.hash;
|
||||||
load_datanode_info();
|
switch(hash) {
|
||||||
} else if (hash === "#tab-snapshot") {
|
case "#tab-datanode":
|
||||||
load_snapshot_info();
|
load_datanode_info();
|
||||||
} else if (hash === "#tab-startup-progress") {
|
break;
|
||||||
load_startup_progress();
|
case "#tab-snapshot":
|
||||||
} else {
|
load_snapshot_info();
|
||||||
load_overview();
|
break;
|
||||||
|
case "#tab-startup-progress":
|
||||||
|
load_startup_progress();
|
||||||
|
break;
|
||||||
|
case "#tab-overview":
|
||||||
|
load_overview();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
window.location.hash = "tab-overview";
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
load_page();
|
||||||
|
|
||||||
|
$(window).bind('hashchange', function () {
|
||||||
|
load_page();
|
||||||
|
});
|
||||||
})();
|
})();
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
<!--
|
<!--
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
@ -14,8 +16,6 @@
|
||||||
See the License for the specific language governing permissions and
|
See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
-->
|
-->
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||||
<head>
|
<head>
|
||||||
<link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
|
<link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
|
||||||
|
|
|
@ -35,8 +35,8 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sticky) {
|
if (sticky) {
|
||||||
var exec = ((parms.perm % 10) & 1) == 1;
|
var otherExec = ((ctx.current().permission % 10) & 1) == 1;
|
||||||
res[res.length - 1] = exec ? 't' : 'T';
|
res = res.substr(0, res.length - 1) + (otherExec ? 't' : 'T');
|
||||||
}
|
}
|
||||||
|
|
||||||
chunk.write(dir + res);
|
chunk.write(dir + res);
|
||||||
|
@ -52,6 +52,18 @@
|
||||||
$('#alert-panel').show();
|
$('#alert-panel').show();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$(window).bind('hashchange', function () {
|
||||||
|
$('#alert-panel').hide();
|
||||||
|
|
||||||
|
var dir = window.location.hash.slice(1);
|
||||||
|
if(dir == "") {
|
||||||
|
dir = "/";
|
||||||
|
}
|
||||||
|
if(current_directory != dir) {
|
||||||
|
browse_directory(dir);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
function network_error_handler(url) {
|
function network_error_handler(url) {
|
||||||
return function (jqxhr, text, err) {
|
return function (jqxhr, text, err) {
|
||||||
var msg = '<p>Failed to retreive data from ' + url + ', cause: ' + err + '</p>';
|
var msg = '<p>Failed to retreive data from ' + url + ', cause: ' + err + '</p>';
|
||||||
|
@ -145,6 +157,7 @@
|
||||||
|
|
||||||
current_directory = dir;
|
current_directory = dir;
|
||||||
$('#directory').val(dir);
|
$('#directory').val(dir);
|
||||||
|
window.location.hash = dir;
|
||||||
dust.render('explorer', base.push(d), function(err, out) {
|
dust.render('explorer', base.push(d), function(err, out) {
|
||||||
$('#panel').html(out);
|
$('#panel').html(out);
|
||||||
|
|
||||||
|
@ -169,7 +182,12 @@
|
||||||
|
|
||||||
var b = function() { browse_directory($('#directory').val()); };
|
var b = function() { browse_directory($('#directory').val()); };
|
||||||
$('#btn-nav-directory').click(b);
|
$('#btn-nav-directory').click(b);
|
||||||
browse_directory('/');
|
var dir = window.location.hash.slice(1);
|
||||||
|
if(dir == "") {
|
||||||
|
window.location.hash = "/";
|
||||||
|
} else {
|
||||||
|
browse_directory(dir);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
init();
|
init();
|
||||||
|
|
|
@ -28,32 +28,39 @@
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.commons.lang.SystemUtils;
|
import org.apache.commons.lang.SystemUtils;
|
||||||
|
import org.apache.commons.lang.mutable.MutableBoolean;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.ExtendedBlockId;
|
||||||
|
import org.apache.hadoop.hdfs.ClientContext;
|
||||||
|
import org.apache.hadoop.hdfs.DFSClient;
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmap;
|
|
||||||
import org.apache.hadoop.hdfs.client.ClientMmapManager;
|
|
||||||
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
|
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitCache;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitCache.CacheVisitor;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.io.ByteBufferPool;
|
import org.apache.hadoop.io.ByteBufferPool;
|
||||||
import org.apache.hadoop.io.ElasticByteBufferPool;
|
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.io.nativeio.NativeIO;
|
import org.apache.hadoop.io.nativeio.NativeIO;
|
||||||
import org.apache.hadoop.net.unix.DomainSocket;
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
import org.apache.hadoop.net.unix.TemporarySocketDirectory;
|
import org.apache.hadoop.net.unix.TemporarySocketDirectory;
|
||||||
|
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Assume;
|
import org.junit.Assume;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.base.Supplier;
|
import com.google.common.base.Supplier;
|
||||||
|
|
||||||
|
@ -250,17 +257,39 @@ public void testZeroCopyReadsNoFallback() throws Exception {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class CountingVisitor
|
private static class CountingVisitor implements CacheVisitor {
|
||||||
implements ClientMmapManager.ClientMmapVisitor {
|
private final int expectedNumOutstandingMmaps;
|
||||||
int count = 0;
|
private final int expectedNumReplicas;
|
||||||
|
private final int expectedNumEvictable;
|
||||||
|
private final int expectedNumMmapedEvictable;
|
||||||
|
|
||||||
@Override
|
CountingVisitor(int expectedNumOutstandingMmaps,
|
||||||
public void accept(ClientMmap mmap) {
|
int expectedNumReplicas, int expectedNumEvictable,
|
||||||
count++;
|
int expectedNumMmapedEvictable) {
|
||||||
|
this.expectedNumOutstandingMmaps = expectedNumOutstandingMmaps;
|
||||||
|
this.expectedNumReplicas = expectedNumReplicas;
|
||||||
|
this.expectedNumEvictable = expectedNumEvictable;
|
||||||
|
this.expectedNumMmapedEvictable = expectedNumMmapedEvictable;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset() {
|
@Override
|
||||||
count = 0;
|
public void visit(int numOutstandingMmaps,
|
||||||
|
Map<ExtendedBlockId, ShortCircuitReplica> replicas,
|
||||||
|
Map<ExtendedBlockId, InvalidToken> failedLoads,
|
||||||
|
Map<Long, ShortCircuitReplica> evictable,
|
||||||
|
Map<Long, ShortCircuitReplica> evictableMmapped) {
|
||||||
|
if (expectedNumOutstandingMmaps >= 0) {
|
||||||
|
Assert.assertEquals(expectedNumOutstandingMmaps, numOutstandingMmaps);
|
||||||
|
}
|
||||||
|
if (expectedNumReplicas >= 0) {
|
||||||
|
Assert.assertEquals(expectedNumReplicas, replicas.size());
|
||||||
|
}
|
||||||
|
if (expectedNumEvictable >= 0) {
|
||||||
|
Assert.assertEquals(expectedNumEvictable, evictable.size());
|
||||||
|
}
|
||||||
|
if (expectedNumMmapedEvictable >= 0) {
|
||||||
|
Assert.assertEquals(expectedNumMmapedEvictable, evictableMmapped.size());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -271,105 +300,98 @@ public void testZeroCopyMmapCache() throws Exception {
|
||||||
final Path TEST_PATH = new Path("/a");
|
final Path TEST_PATH = new Path("/a");
|
||||||
final int TEST_FILE_LENGTH = 16385;
|
final int TEST_FILE_LENGTH = 16385;
|
||||||
final int RANDOM_SEED = 23453;
|
final int RANDOM_SEED = 23453;
|
||||||
|
final String CONTEXT = "testZeroCopyMmapCacheContext";
|
||||||
FSDataInputStream fsIn = null;
|
FSDataInputStream fsIn = null;
|
||||||
ByteBuffer results[] = { null, null, null, null, null };
|
ByteBuffer results[] = { null, null, null, null };
|
||||||
|
|
||||||
DistributedFileSystem fs = null;
|
DistributedFileSystem fs = null;
|
||||||
|
conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT);
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
fs = cluster.getFileSystem();
|
||||||
|
DFSTestUtil.createFile(fs, TEST_PATH,
|
||||||
|
TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
|
||||||
try {
|
try {
|
||||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
|
DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
|
||||||
cluster.waitActive();
|
} catch (InterruptedException e) {
|
||||||
fs = cluster.getFileSystem();
|
Assert.fail("unexpected InterruptedException during " +
|
||||||
DFSTestUtil.createFile(fs, TEST_PATH,
|
"waitReplication: " + e);
|
||||||
TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
|
} catch (TimeoutException e) {
|
||||||
try {
|
Assert.fail("unexpected TimeoutException during " +
|
||||||
DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
|
"waitReplication: " + e);
|
||||||
} catch (InterruptedException e) {
|
|
||||||
Assert.fail("unexpected InterruptedException during " +
|
|
||||||
"waitReplication: " + e);
|
|
||||||
} catch (TimeoutException e) {
|
|
||||||
Assert.fail("unexpected TimeoutException during " +
|
|
||||||
"waitReplication: " + e);
|
|
||||||
}
|
|
||||||
fsIn = fs.open(TEST_PATH);
|
|
||||||
byte original[] = new byte[TEST_FILE_LENGTH];
|
|
||||||
IOUtils.readFully(fsIn, original, 0, TEST_FILE_LENGTH);
|
|
||||||
fsIn.close();
|
|
||||||
fsIn = fs.open(TEST_PATH);
|
|
||||||
final ClientMmapManager mmapManager = fs.getClient().getMmapManager();
|
|
||||||
final CountingVisitor countingVisitor = new CountingVisitor();
|
|
||||||
mmapManager.visitMmaps(countingVisitor);
|
|
||||||
Assert.assertEquals(0, countingVisitor.count);
|
|
||||||
mmapManager.visitEvictable(countingVisitor);
|
|
||||||
Assert.assertEquals(0, countingVisitor.count);
|
|
||||||
results[0] = fsIn.read(null, 4096,
|
|
||||||
EnumSet.of(ReadOption.SKIP_CHECKSUMS));
|
|
||||||
fsIn.seek(0);
|
|
||||||
results[1] = fsIn.read(null, 4096,
|
|
||||||
EnumSet.of(ReadOption.SKIP_CHECKSUMS));
|
|
||||||
mmapManager.visitMmaps(countingVisitor);
|
|
||||||
Assert.assertEquals(1, countingVisitor.count);
|
|
||||||
countingVisitor.reset();
|
|
||||||
mmapManager.visitEvictable(countingVisitor);
|
|
||||||
Assert.assertEquals(0, countingVisitor.count);
|
|
||||||
countingVisitor.reset();
|
|
||||||
|
|
||||||
// The mmaps should be of the first block of the file.
|
|
||||||
final ExtendedBlock firstBlock = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
|
|
||||||
mmapManager.visitMmaps(new ClientMmapManager.ClientMmapVisitor() {
|
|
||||||
@Override
|
|
||||||
public void accept(ClientMmap mmap) {
|
|
||||||
Assert.assertEquals(firstBlock, mmap.getBlock());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Read more blocks.
|
|
||||||
results[2] = fsIn.read(null, 4096,
|
|
||||||
EnumSet.of(ReadOption.SKIP_CHECKSUMS));
|
|
||||||
results[3] = fsIn.read(null, 4096,
|
|
||||||
EnumSet.of(ReadOption.SKIP_CHECKSUMS));
|
|
||||||
try {
|
|
||||||
results[4] = fsIn.read(null, 4096,
|
|
||||||
EnumSet.of(ReadOption.SKIP_CHECKSUMS));
|
|
||||||
Assert.fail("expected UnsupportedOperationException");
|
|
||||||
} catch (UnsupportedOperationException e) {
|
|
||||||
// expected
|
|
||||||
}
|
|
||||||
|
|
||||||
// we should have 3 mmaps, 0 evictable
|
|
||||||
mmapManager.visitMmaps(countingVisitor);
|
|
||||||
Assert.assertEquals(3, countingVisitor.count);
|
|
||||||
countingVisitor.reset();
|
|
||||||
mmapManager.visitEvictable(countingVisitor);
|
|
||||||
Assert.assertEquals(0, countingVisitor.count);
|
|
||||||
|
|
||||||
// After we close the cursors, the mmaps should be evictable for
|
|
||||||
// a brief period of time. Then, they should be closed (we're
|
|
||||||
// using a very quick timeout)
|
|
||||||
for (ByteBuffer buffer : results) {
|
|
||||||
if (buffer != null) {
|
|
||||||
fsIn.releaseBuffer(buffer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
|
||||||
public Boolean get() {
|
|
||||||
countingVisitor.reset();
|
|
||||||
try {
|
|
||||||
mmapManager.visitEvictable(countingVisitor);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return (0 == countingVisitor.count);
|
|
||||||
}
|
|
||||||
}, 10, 10000);
|
|
||||||
countingVisitor.reset();
|
|
||||||
mmapManager.visitMmaps(countingVisitor);
|
|
||||||
Assert.assertEquals(0, countingVisitor.count);
|
|
||||||
} finally {
|
|
||||||
if (fsIn != null) fsIn.close();
|
|
||||||
if (fs != null) fs.close();
|
|
||||||
if (cluster != null) cluster.shutdown();
|
|
||||||
}
|
}
|
||||||
|
fsIn = fs.open(TEST_PATH);
|
||||||
|
byte original[] = new byte[TEST_FILE_LENGTH];
|
||||||
|
IOUtils.readFully(fsIn, original, 0, TEST_FILE_LENGTH);
|
||||||
|
fsIn.close();
|
||||||
|
fsIn = fs.open(TEST_PATH);
|
||||||
|
final ShortCircuitCache cache = ClientContext.get(
|
||||||
|
CONTEXT, new DFSClient.Conf(conf)). getShortCircuitCache();
|
||||||
|
cache.accept(new CountingVisitor(0, 5, 5, 0));
|
||||||
|
results[0] = fsIn.read(null, 4096,
|
||||||
|
EnumSet.of(ReadOption.SKIP_CHECKSUMS));
|
||||||
|
fsIn.seek(0);
|
||||||
|
results[1] = fsIn.read(null, 4096,
|
||||||
|
EnumSet.of(ReadOption.SKIP_CHECKSUMS));
|
||||||
|
|
||||||
|
// The mmap should be of the first block of the file.
|
||||||
|
final ExtendedBlock firstBlock =
|
||||||
|
DFSTestUtil.getFirstBlock(fs, TEST_PATH);
|
||||||
|
cache.accept(new CacheVisitor() {
|
||||||
|
@Override
|
||||||
|
public void visit(int numOutstandingMmaps,
|
||||||
|
Map<ExtendedBlockId, ShortCircuitReplica> replicas,
|
||||||
|
Map<ExtendedBlockId, InvalidToken> failedLoads,
|
||||||
|
Map<Long, ShortCircuitReplica> evictable,
|
||||||
|
Map<Long, ShortCircuitReplica> evictableMmapped) {
|
||||||
|
ShortCircuitReplica replica = replicas.get(
|
||||||
|
new ExtendedBlockId(firstBlock.getBlockId(), firstBlock.getBlockPoolId()));
|
||||||
|
Assert.assertNotNull(replica);
|
||||||
|
Assert.assertTrue(replica.hasMmap());
|
||||||
|
// The replica should not yet be evictable, since we have it open.
|
||||||
|
Assert.assertNull(replica.getEvictableTimeNs());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Read more blocks.
|
||||||
|
results[2] = fsIn.read(null, 4096,
|
||||||
|
EnumSet.of(ReadOption.SKIP_CHECKSUMS));
|
||||||
|
results[3] = fsIn.read(null, 4096,
|
||||||
|
EnumSet.of(ReadOption.SKIP_CHECKSUMS));
|
||||||
|
|
||||||
|
// we should have 3 mmaps, 1 evictable
|
||||||
|
cache.accept(new CountingVisitor(3, 5, 2, 0));
|
||||||
|
|
||||||
|
// After we close the cursors, the mmaps should be evictable for
|
||||||
|
// a brief period of time. Then, they should be closed (we're
|
||||||
|
// using a very quick timeout)
|
||||||
|
for (ByteBuffer buffer : results) {
|
||||||
|
if (buffer != null) {
|
||||||
|
fsIn.releaseBuffer(buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fsIn.close();
|
||||||
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
|
public Boolean get() {
|
||||||
|
final MutableBoolean finished = new MutableBoolean(false);
|
||||||
|
cache.accept(new CacheVisitor() {
|
||||||
|
@Override
|
||||||
|
public void visit(int numOutstandingMmaps,
|
||||||
|
Map<ExtendedBlockId, ShortCircuitReplica> replicas,
|
||||||
|
Map<ExtendedBlockId, InvalidToken> failedLoads,
|
||||||
|
Map<Long, ShortCircuitReplica> evictable,
|
||||||
|
Map<Long, ShortCircuitReplica> evictableMmapped) {
|
||||||
|
finished.setValue(evictableMmapped.isEmpty());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return finished.booleanValue();
|
||||||
|
}
|
||||||
|
}, 10, 60000);
|
||||||
|
|
||||||
|
cache.accept(new CountingVisitor(0, -1, -1, -1));
|
||||||
|
|
||||||
|
fs.close();
|
||||||
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.security.PrivilegedExceptionAction;
|
import java.security.PrivilegedExceptionAction;
|
||||||
|
import java.util.UUID;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
@ -1175,4 +1176,32 @@ public void testReservedHdfsPathsOnFS() throws Exception {
|
||||||
public void testReservedHdfsPathsOnFC() throws Exception {
|
public void testReservedHdfsPathsOnFC() throws Exception {
|
||||||
testOnFileContext(new TestReservedHdfsPaths());
|
testOnFileContext(new TestReservedHdfsPaths());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test trying to glob the root. Regression test for HDFS-5888.
|
||||||
|
**/
|
||||||
|
private static class TestGlobRoot implements FSTestWrapperGlobTest {
|
||||||
|
public void run(FSTestWrapper wrap, FSTestWrapper unprivilegedWrap,
|
||||||
|
FileSystem fs, FileContext fc) throws Exception {
|
||||||
|
final Path rootPath = new Path("/");
|
||||||
|
FileStatus oldRootStatus = wrap.getFileStatus(rootPath);
|
||||||
|
String newOwner = UUID.randomUUID().toString();
|
||||||
|
wrap.setOwner(new Path("/"), newOwner, null);
|
||||||
|
FileStatus[] status =
|
||||||
|
wrap.globStatus(rootPath, new AcceptAllPathFilter());
|
||||||
|
Assert.assertEquals(1, status.length);
|
||||||
|
Assert.assertEquals(newOwner, status[0].getOwner());
|
||||||
|
wrap.setOwner(new Path("/"), oldRootStatus.getOwner(), null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGlobRootOnFS() throws Exception {
|
||||||
|
testOnFileSystem(new TestGlobRoot());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGlobRootOnFC() throws Exception {
|
||||||
|
testOnFileContext(new TestGlobRoot());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,8 +28,12 @@
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitCache;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
|
||||||
|
import org.apache.hadoop.hdfs.net.Peer;
|
||||||
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
|
@ -38,6 +42,8 @@
|
||||||
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
|
import org.apache.log4j.Level;
|
||||||
|
import org.apache.log4j.LogManager;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A helper class to setup the cluster, and get to BlockReader and DataNode for a block.
|
* A helper class to setup the cluster, and get to BlockReader and DataNode for a block.
|
||||||
|
@ -141,22 +147,54 @@ public void readAndCheckEOS(BlockReader reader, int length, boolean expectEof)
|
||||||
*/
|
*/
|
||||||
public BlockReader getBlockReader(LocatedBlock testBlock, int offset, int lenToRead)
|
public BlockReader getBlockReader(LocatedBlock testBlock, int offset, int lenToRead)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
return getBlockReader(cluster, testBlock, offset, lenToRead);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a BlockReader for the given block.
|
||||||
|
*/
|
||||||
|
public static BlockReader getBlockReader(MiniDFSCluster cluster,
|
||||||
|
LocatedBlock testBlock, int offset, int lenToRead) throws IOException {
|
||||||
InetSocketAddress targetAddr = null;
|
InetSocketAddress targetAddr = null;
|
||||||
Socket sock = null;
|
|
||||||
ExtendedBlock block = testBlock.getBlock();
|
ExtendedBlock block = testBlock.getBlock();
|
||||||
DatanodeInfo[] nodes = testBlock.getLocations();
|
DatanodeInfo[] nodes = testBlock.getLocations();
|
||||||
targetAddr = NetUtils.createSocketAddr(nodes[0].getXferAddr());
|
targetAddr = NetUtils.createSocketAddr(nodes[0].getXferAddr());
|
||||||
sock = NetUtils.getDefaultSocketFactory(conf).createSocket();
|
|
||||||
sock.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT);
|
|
||||||
sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
|
|
||||||
|
|
||||||
return BlockReaderFactory.newBlockReader(
|
final DistributedFileSystem fs = cluster.getFileSystem();
|
||||||
new DFSClient.Conf(conf),
|
return new BlockReaderFactory(fs.getClient().getConf()).
|
||||||
targetAddr.toString()+ ":" + block.getBlockId(), block,
|
setInetSocketAddress(targetAddr).
|
||||||
testBlock.getBlockToken(),
|
setBlock(block).
|
||||||
offset, lenToRead,
|
setFileName(targetAddr.toString()+ ":" + block.getBlockId()).
|
||||||
true, "BlockReaderTestUtil", TcpPeerServer.peerFromSocket(sock),
|
setBlockToken(testBlock.getBlockToken()).
|
||||||
nodes[0], null, null, null, false, CachingStrategy.newDefaultStrategy());
|
setStartOffset(offset).
|
||||||
|
setLength(lenToRead).
|
||||||
|
setVerifyChecksum(true).
|
||||||
|
setClientName("BlockReaderTestUtil").
|
||||||
|
setDatanodeInfo(nodes[0]).
|
||||||
|
setClientCacheContext(ClientContext.getFromConf(fs.getConf())).
|
||||||
|
setCachingStrategy(CachingStrategy.newDefaultStrategy()).
|
||||||
|
setConfiguration(fs.getConf()).
|
||||||
|
setAllowShortCircuitLocalReads(true).
|
||||||
|
setRemotePeerFactory(new RemotePeerFactory() {
|
||||||
|
@Override
|
||||||
|
public Peer newConnectedPeer(InetSocketAddress addr)
|
||||||
|
throws IOException {
|
||||||
|
Peer peer = null;
|
||||||
|
Socket sock = NetUtils.
|
||||||
|
getDefaultSocketFactory(fs.getConf()).createSocket();
|
||||||
|
try {
|
||||||
|
sock.connect(addr, HdfsServerConstants.READ_TIMEOUT);
|
||||||
|
sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
|
||||||
|
peer = TcpPeerServer.peerFromSocket(sock);
|
||||||
|
} finally {
|
||||||
|
if (peer == null) {
|
||||||
|
IOUtils.closeQuietly(sock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return peer;
|
||||||
|
}
|
||||||
|
}).
|
||||||
|
build();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -167,4 +205,13 @@ public DataNode getDataNode(LocatedBlock testBlock) {
|
||||||
int ipcport = nodes[0].getIpcPort();
|
int ipcport = nodes[0].getIpcPort();
|
||||||
return cluster.getDataNode(ipcport);
|
return cluster.getDataNode(ipcport);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void enableBlockReaderFactoryTracing() {
|
||||||
|
LogManager.getLogger(BlockReaderFactory.class.getName()).setLevel(
|
||||||
|
Level.TRACE);
|
||||||
|
LogManager.getLogger(ShortCircuitCache.class.getName()).setLevel(
|
||||||
|
Level.TRACE);
|
||||||
|
LogManager.getLogger(ShortCircuitReplica.class.getName()).setLevel(
|
||||||
|
Level.TRACE);
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -186,10 +186,26 @@ public void createFiles(FileSystem fs, String topdir,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String readFile(FileSystem fs, Path fileName) throws IOException {
|
public static String readFile(FileSystem fs, Path fileName)
|
||||||
|
throws IOException {
|
||||||
|
byte buf[] = readFileBuffer(fs, fileName);
|
||||||
|
return new String(buf, 0, buf.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte[] readFileBuffer(FileSystem fs, Path fileName)
|
||||||
|
throws IOException {
|
||||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||||
IOUtils.copyBytes(fs.open(fileName), os, 1024, true);
|
try {
|
||||||
return os.toString();
|
FSDataInputStream in = fs.open(fileName);
|
||||||
|
try {
|
||||||
|
IOUtils.copyBytes(fs.open(fileName), os, 1024, true);
|
||||||
|
return os.toByteArray();
|
||||||
|
} finally {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
os.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void createFile(FileSystem fs, Path fileName, long fileLen,
|
public static void createFile(FileSystem fs, Path fileName, long fileLen,
|
||||||
|
@ -231,6 +247,13 @@ public static void createFile(FileSystem fs, Path fileName, int bufferLen,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static byte[] calculateFileContentsFromSeed(long seed, int length) {
|
||||||
|
Random rb = new Random(seed);
|
||||||
|
byte val[] = new byte[length];
|
||||||
|
rb.nextBytes(val);
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
/** check if the files have been copied correctly. */
|
/** check if the files have been copied correctly. */
|
||||||
public boolean checkFiles(FileSystem fs, String topdir) throws IOException {
|
public boolean checkFiles(FileSystem fs, String topdir) throws IOException {
|
||||||
Path root = new Path(topdir);
|
Path root = new Path(topdir);
|
||||||
|
@ -550,8 +573,12 @@ public void cleanup(FileSystem fs, String topdir) throws IOException {
|
||||||
|
|
||||||
public static ExtendedBlock getFirstBlock(FileSystem fs, Path path) throws IOException {
|
public static ExtendedBlock getFirstBlock(FileSystem fs, Path path) throws IOException {
|
||||||
HdfsDataInputStream in = (HdfsDataInputStream) fs.open(path);
|
HdfsDataInputStream in = (HdfsDataInputStream) fs.open(path);
|
||||||
in.readByte();
|
try {
|
||||||
return in.getCurrentBlock();
|
in.readByte();
|
||||||
|
return in.getCurrentBlock();
|
||||||
|
} finally {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<LocatedBlock> getAllBlocks(FSDataInputStream in)
|
public static List<LocatedBlock> getAllBlocks(FSDataInputStream in)
|
||||||
|
|
|
@ -0,0 +1,285 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitCache;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitReplicaInfo;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
|
import org.apache.hadoop.net.unix.TemporarySocketDirectory;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import com.google.common.util.concurrent.Uninterruptibles;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CONTEXT;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC;
|
||||||
|
|
||||||
|
public class TestBlockReaderFactory {
|
||||||
|
static final Log LOG = LogFactory.getLog(TestBlockReaderFactory.class);
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void init() {
|
||||||
|
DomainSocket.disableBindPathValidation();
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void cleanup() {
|
||||||
|
DFSInputStream.tcpReadsDisabledForTesting = false;
|
||||||
|
BlockReaderFactory.createShortCircuitReplicaInfoCallback = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Configuration createShortCircuitConf(String testName,
|
||||||
|
TemporarySocketDirectory sockDir) {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set(DFS_CLIENT_CONTEXT, testName);
|
||||||
|
conf.setLong(DFS_BLOCK_SIZE_KEY, 4096);
|
||||||
|
conf.set(DFS_DOMAIN_SOCKET_PATH_KEY, new File(sockDir.getDir(),
|
||||||
|
testName + "._PORT").getAbsolutePath());
|
||||||
|
conf.setBoolean(DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
|
||||||
|
conf.setBoolean(DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
|
||||||
|
false);
|
||||||
|
conf.setBoolean(DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC, false);
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If we have a UNIX domain socket configured,
|
||||||
|
* and we have dfs.client.domain.socket.data.traffic set to true,
|
||||||
|
* and short-circuit access fails, we should still be able to pass
|
||||||
|
* data traffic over the UNIX domain socket. Test this.
|
||||||
|
*/
|
||||||
|
@Test(timeout=60000)
|
||||||
|
public void testFallbackFromShortCircuitToUnixDomainTraffic()
|
||||||
|
throws Exception {
|
||||||
|
DFSInputStream.tcpReadsDisabledForTesting = true;
|
||||||
|
TemporarySocketDirectory sockDir = new TemporarySocketDirectory();
|
||||||
|
|
||||||
|
// The server is NOT configured with short-circuit local reads;
|
||||||
|
// the client is. Both support UNIX domain reads.
|
||||||
|
Configuration clientConf = createShortCircuitConf(
|
||||||
|
"testFallbackFromShortCircuitToUnixDomainTraffic", sockDir);
|
||||||
|
clientConf.setBoolean(DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC, true);
|
||||||
|
Configuration serverConf = new Configuration(clientConf);
|
||||||
|
serverConf.setBoolean(DFS_CLIENT_READ_SHORTCIRCUIT_KEY, false);
|
||||||
|
|
||||||
|
MiniDFSCluster cluster =
|
||||||
|
new MiniDFSCluster.Builder(serverConf).numDataNodes(1).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
FileSystem dfs = FileSystem.get(cluster.getURI(0), clientConf);
|
||||||
|
String TEST_FILE = "/test_file";
|
||||||
|
final int TEST_FILE_LEN = 8193;
|
||||||
|
final int SEED = 0xFADED;
|
||||||
|
DFSTestUtil.createFile(dfs, new Path(TEST_FILE), TEST_FILE_LEN,
|
||||||
|
(short)1, SEED);
|
||||||
|
byte contents[] = DFSTestUtil.readFileBuffer(dfs, new Path(TEST_FILE));
|
||||||
|
byte expected[] = DFSTestUtil.
|
||||||
|
calculateFileContentsFromSeed(SEED, TEST_FILE_LEN);
|
||||||
|
Assert.assertTrue(Arrays.equals(contents, expected));
|
||||||
|
cluster.shutdown();
|
||||||
|
sockDir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the case where we have multiple threads waiting on the
|
||||||
|
* ShortCircuitCache delivering a certain ShortCircuitReplica.
|
||||||
|
*
|
||||||
|
* In this case, there should only be one call to
|
||||||
|
* createShortCircuitReplicaInfo. This one replica should be shared
|
||||||
|
* by all threads.
|
||||||
|
*/
|
||||||
|
@Test(timeout=60000)
|
||||||
|
public void testMultipleWaitersOnShortCircuitCache()
|
||||||
|
throws Exception {
|
||||||
|
final CountDownLatch latch = new CountDownLatch(1);
|
||||||
|
final AtomicBoolean creationIsBlocked = new AtomicBoolean(true);
|
||||||
|
final AtomicBoolean testFailed = new AtomicBoolean(false);
|
||||||
|
DFSInputStream.tcpReadsDisabledForTesting = true;
|
||||||
|
BlockReaderFactory.createShortCircuitReplicaInfoCallback =
|
||||||
|
new ShortCircuitCache.ShortCircuitReplicaCreator() {
|
||||||
|
@Override
|
||||||
|
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
|
||||||
|
Uninterruptibles.awaitUninterruptibly(latch);
|
||||||
|
if (!creationIsBlocked.compareAndSet(true, false)) {
|
||||||
|
Assert.fail("there were multiple calls to "
|
||||||
|
+ "createShortCircuitReplicaInfo. Only one was expected.");
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
TemporarySocketDirectory sockDir = new TemporarySocketDirectory();
|
||||||
|
Configuration conf = createShortCircuitConf(
|
||||||
|
"testMultipleWaitersOnShortCircuitCache", sockDir);
|
||||||
|
MiniDFSCluster cluster =
|
||||||
|
new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
final DistributedFileSystem dfs = cluster.getFileSystem();
|
||||||
|
final String TEST_FILE = "/test_file";
|
||||||
|
final int TEST_FILE_LEN = 4000;
|
||||||
|
final int SEED = 0xFADED;
|
||||||
|
final int NUM_THREADS = 10;
|
||||||
|
DFSTestUtil.createFile(dfs, new Path(TEST_FILE), TEST_FILE_LEN,
|
||||||
|
(short)1, SEED);
|
||||||
|
Runnable readerRunnable = new Runnable() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
byte contents[] = DFSTestUtil.readFileBuffer(dfs, new Path(TEST_FILE));
|
||||||
|
Assert.assertFalse(creationIsBlocked.get());
|
||||||
|
byte expected[] = DFSTestUtil.
|
||||||
|
calculateFileContentsFromSeed(SEED, TEST_FILE_LEN);
|
||||||
|
Assert.assertTrue(Arrays.equals(contents, expected));
|
||||||
|
} catch (Throwable e) {
|
||||||
|
LOG.error("readerRunnable error", e);
|
||||||
|
testFailed.set(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Thread threads[] = new Thread[NUM_THREADS];
|
||||||
|
for (int i = 0; i < NUM_THREADS; i++) {
|
||||||
|
threads[i] = new Thread(readerRunnable);
|
||||||
|
threads[i].start();
|
||||||
|
}
|
||||||
|
Thread.sleep(500);
|
||||||
|
latch.countDown();
|
||||||
|
for (int i = 0; i < NUM_THREADS; i++) {
|
||||||
|
Uninterruptibles.joinUninterruptibly(threads[i]);
|
||||||
|
}
|
||||||
|
cluster.shutdown();
|
||||||
|
sockDir.close();
|
||||||
|
Assert.assertFalse(testFailed.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the case where we have a failure to complete a short circuit read
|
||||||
|
* that occurs, and then later on, we have a success.
|
||||||
|
* Any thread waiting on a cache load should receive the failure (if it
|
||||||
|
* occurs); however, the failure result should not be cached. We want
|
||||||
|
* to be able to retry later and succeed.
|
||||||
|
*/
|
||||||
|
@Test(timeout=60000)
|
||||||
|
public void testShortCircuitCacheTemporaryFailure()
|
||||||
|
throws Exception {
|
||||||
|
BlockReaderTestUtil.enableBlockReaderFactoryTracing();
|
||||||
|
final AtomicBoolean replicaCreationShouldFail = new AtomicBoolean(true);
|
||||||
|
final AtomicBoolean testFailed = new AtomicBoolean(false);
|
||||||
|
DFSInputStream.tcpReadsDisabledForTesting = true;
|
||||||
|
BlockReaderFactory.createShortCircuitReplicaInfoCallback =
|
||||||
|
new ShortCircuitCache.ShortCircuitReplicaCreator() {
|
||||||
|
@Override
|
||||||
|
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
|
||||||
|
if (replicaCreationShouldFail.get()) {
|
||||||
|
// Insert a short delay to increase the chance that one client
|
||||||
|
// thread waits for the other client thread's failure via
|
||||||
|
// a condition variable.
|
||||||
|
Uninterruptibles.sleepUninterruptibly(2, TimeUnit.SECONDS);
|
||||||
|
return new ShortCircuitReplicaInfo();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
TemporarySocketDirectory sockDir = new TemporarySocketDirectory();
|
||||||
|
Configuration conf = createShortCircuitConf(
|
||||||
|
"testShortCircuitCacheTemporaryFailure", sockDir);
|
||||||
|
final MiniDFSCluster cluster =
|
||||||
|
new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
final DistributedFileSystem dfs = cluster.getFileSystem();
|
||||||
|
final String TEST_FILE = "/test_file";
|
||||||
|
final int TEST_FILE_LEN = 4000;
|
||||||
|
final int NUM_THREADS = 2;
|
||||||
|
final int SEED = 0xFADED;
|
||||||
|
final CountDownLatch gotFailureLatch = new CountDownLatch(NUM_THREADS);
|
||||||
|
final CountDownLatch shouldRetryLatch = new CountDownLatch(1);
|
||||||
|
DFSTestUtil.createFile(dfs, new Path(TEST_FILE), TEST_FILE_LEN,
|
||||||
|
(short)1, SEED);
|
||||||
|
Runnable readerRunnable = new Runnable() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
// First time should fail.
|
||||||
|
List<LocatedBlock> locatedBlocks =
|
||||||
|
cluster.getNameNode().getRpcServer().getBlockLocations(
|
||||||
|
TEST_FILE, 0, TEST_FILE_LEN).getLocatedBlocks();
|
||||||
|
LocatedBlock lblock = locatedBlocks.get(0); // first block
|
||||||
|
BlockReader blockReader = null;
|
||||||
|
try {
|
||||||
|
blockReader = BlockReaderTestUtil.
|
||||||
|
getBlockReader(cluster, lblock, 0, TEST_FILE_LEN);
|
||||||
|
Assert.fail("expected getBlockReader to fail the first time.");
|
||||||
|
} catch (Throwable t) {
|
||||||
|
Assert.assertTrue("expected to see 'TCP reads were disabled " +
|
||||||
|
"for testing' in exception " + t, t.getMessage().contains(
|
||||||
|
"TCP reads were disabled for testing"));
|
||||||
|
} finally {
|
||||||
|
if (blockReader != null) blockReader.close(); // keep findbugs happy
|
||||||
|
}
|
||||||
|
gotFailureLatch.countDown();
|
||||||
|
shouldRetryLatch.await();
|
||||||
|
|
||||||
|
// Second time should succeed.
|
||||||
|
try {
|
||||||
|
blockReader = BlockReaderTestUtil.
|
||||||
|
getBlockReader(cluster, lblock, 0, TEST_FILE_LEN);
|
||||||
|
} catch (Throwable t) {
|
||||||
|
LOG.error("error trying to retrieve a block reader " +
|
||||||
|
"the second time.", t);
|
||||||
|
throw t;
|
||||||
|
} finally {
|
||||||
|
if (blockReader != null) blockReader.close();
|
||||||
|
}
|
||||||
|
} catch (Throwable t) {
|
||||||
|
LOG.error("getBlockReader failure", t);
|
||||||
|
testFailed.set(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Thread threads[] = new Thread[NUM_THREADS];
|
||||||
|
for (int i = 0; i < NUM_THREADS; i++) {
|
||||||
|
threads[i] = new Thread(readerRunnable);
|
||||||
|
threads[i].start();
|
||||||
|
}
|
||||||
|
gotFailureLatch.await();
|
||||||
|
replicaCreationShouldFail.set(false);
|
||||||
|
shouldRetryLatch.countDown();
|
||||||
|
for (int i = 0; i < NUM_THREADS; i++) {
|
||||||
|
Uninterruptibles.joinUninterruptibly(threads[i]);
|
||||||
|
}
|
||||||
|
cluster.shutdown();
|
||||||
|
sockDir.close();
|
||||||
|
Assert.assertFalse(testFailed.get());
|
||||||
|
}
|
||||||
|
}
|
|
@ -30,13 +30,15 @@
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
|
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitCache;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
|
||||||
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.net.unix.DomainSocket;
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
import org.apache.hadoop.net.unix.TemporarySocketDirectory;
|
import org.apache.hadoop.net.unix.TemporarySocketDirectory;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Assume;
|
import org.junit.Assume;
|
||||||
|
@ -155,6 +157,8 @@ public void runBlockReaderLocalTest(BlockReaderLocalTest test,
|
||||||
File metaFile = MiniDFSCluster.getBlockMetadataFile(0, block);
|
File metaFile = MiniDFSCluster.getBlockMetadataFile(0, block);
|
||||||
|
|
||||||
DatanodeID datanodeID = cluster.getDataNodes().get(0).getDatanodeId();
|
DatanodeID datanodeID = cluster.getDataNodes().get(0).getDatanodeId();
|
||||||
|
ShortCircuitCache shortCircuitCache =
|
||||||
|
ClientContext.getFromConf(conf).getShortCircuitCache();
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
cluster = null;
|
cluster = null;
|
||||||
test.setup(dataFile, checksum);
|
test.setup(dataFile, checksum);
|
||||||
|
@ -164,16 +168,17 @@ public void runBlockReaderLocalTest(BlockReaderLocalTest test,
|
||||||
};
|
};
|
||||||
dataIn = streams[0];
|
dataIn = streams[0];
|
||||||
metaIn = streams[1];
|
metaIn = streams[1];
|
||||||
|
ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
|
||||||
|
ShortCircuitReplica replica = new ShortCircuitReplica(
|
||||||
|
key, dataIn, metaIn, shortCircuitCache, Time.now());
|
||||||
blockReaderLocal = new BlockReaderLocal.Builder(
|
blockReaderLocal = new BlockReaderLocal.Builder(
|
||||||
new DFSClient.Conf(conf)).
|
new DFSClient.Conf(conf)).
|
||||||
setFilename(TEST_PATH.getName()).
|
setFilename(TEST_PATH.getName()).
|
||||||
setBlock(block).
|
setBlock(block).
|
||||||
setStreams(streams).
|
setShortCircuitReplica(replica).
|
||||||
setDatanodeID(datanodeID).
|
setDatanodeID(datanodeID).
|
||||||
setCachingStrategy(new CachingStrategy(false, readahead)).
|
setCachingStrategy(new CachingStrategy(false, readahead)).
|
||||||
setVerifyChecksum(checksum).
|
setVerifyChecksum(checksum).
|
||||||
setBlockMetadataHeader(BlockMetadataHeader.preadHeader(
|
|
||||||
metaIn.getChannel())).
|
|
||||||
build();
|
build();
|
||||||
dataIn = null;
|
dataIn = null;
|
||||||
metaIn = null;
|
metaIn = null;
|
||||||
|
|
|
@ -25,18 +25,8 @@
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
|
||||||
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
|
||||||
import org.apache.hadoop.hdfs.net.Peer;
|
|
||||||
import org.apache.hadoop.security.token.Token;
|
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.mockito.Matchers;
|
|
||||||
import org.mockito.Mockito;
|
|
||||||
import org.mockito.invocation.InvocationOnMock;
|
|
||||||
import org.mockito.stubbing.Answer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class tests the client connection caching in a single node
|
* This class tests the client connection caching in a single node
|
||||||
|
@ -48,30 +38,6 @@ public class TestConnCache {
|
||||||
static final int BLOCK_SIZE = 4096;
|
static final int BLOCK_SIZE = 4096;
|
||||||
static final int FILE_SIZE = 3 * BLOCK_SIZE;
|
static final int FILE_SIZE = 3 * BLOCK_SIZE;
|
||||||
|
|
||||||
/**
|
|
||||||
* A mock Answer to remember the BlockReader used.
|
|
||||||
*
|
|
||||||
* It verifies that all invocation to DFSInputStream.getBlockReader()
|
|
||||||
* use the same peer.
|
|
||||||
*/
|
|
||||||
private class MockGetBlockReader implements Answer<RemoteBlockReader2> {
|
|
||||||
public RemoteBlockReader2 reader = null;
|
|
||||||
private Peer peer = null;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public RemoteBlockReader2 answer(InvocationOnMock invocation) throws Throwable {
|
|
||||||
RemoteBlockReader2 prevReader = reader;
|
|
||||||
reader = (RemoteBlockReader2) invocation.callRealMethod();
|
|
||||||
if (peer == null) {
|
|
||||||
peer = reader.getPeer();
|
|
||||||
} else if (prevReader != null) {
|
|
||||||
Assert.assertSame("DFSInputStream should use the same peer",
|
|
||||||
peer, reader.getPeer());
|
|
||||||
}
|
|
||||||
return reader;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* (Optionally) seek to position, read and verify data.
|
* (Optionally) seek to position, read and verify data.
|
||||||
*
|
*
|
||||||
|
@ -115,33 +81,29 @@ private void pread(DFSInputStream in,
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void testReadFromOneDN() throws Exception {
|
public void testReadFromOneDN() throws Exception {
|
||||||
BlockReaderTestUtil util = new BlockReaderTestUtil(1,
|
HdfsConfiguration configuration = new HdfsConfiguration();
|
||||||
new HdfsConfiguration());
|
// One of the goals of this test is to verify that we don't open more
|
||||||
|
// than one socket. So use a different client context, so that we
|
||||||
|
// get our own socket cache, rather than sharing with the other test
|
||||||
|
// instances. Also use a really long socket timeout so that nothing
|
||||||
|
// gets closed before we get around to checking the cache size at the end.
|
||||||
|
final String contextName = "testReadFromOneDNContext";
|
||||||
|
configuration.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, contextName);
|
||||||
|
configuration.setLong(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY,
|
||||||
|
100000000L);
|
||||||
|
BlockReaderTestUtil util = new BlockReaderTestUtil(1, configuration);
|
||||||
final Path testFile = new Path("/testConnCache.dat");
|
final Path testFile = new Path("/testConnCache.dat");
|
||||||
byte authenticData[] = util.writeFile(testFile, FILE_SIZE / 1024);
|
byte authenticData[] = util.writeFile(testFile, FILE_SIZE / 1024);
|
||||||
DFSClient client = new DFSClient(
|
DFSClient client = new DFSClient(
|
||||||
new InetSocketAddress("localhost",
|
new InetSocketAddress("localhost",
|
||||||
util.getCluster().getNameNodePort()), util.getConf());
|
util.getCluster().getNameNodePort()), util.getConf());
|
||||||
DFSInputStream in = Mockito.spy(client.open(testFile.toString()));
|
ClientContext cacheContext =
|
||||||
|
ClientContext.get(contextName, client.getConf());
|
||||||
|
DFSInputStream in = client.open(testFile.toString());
|
||||||
LOG.info("opened " + testFile.toString());
|
LOG.info("opened " + testFile.toString());
|
||||||
byte[] dataBuf = new byte[BLOCK_SIZE];
|
byte[] dataBuf = new byte[BLOCK_SIZE];
|
||||||
|
|
||||||
MockGetBlockReader answer = new MockGetBlockReader();
|
|
||||||
Mockito.doAnswer(answer).when(in).getBlockReader(
|
|
||||||
(InetSocketAddress) Matchers.anyObject(),
|
|
||||||
(DatanodeInfo) Matchers.anyObject(),
|
|
||||||
Matchers.anyString(),
|
|
||||||
(ExtendedBlock) Matchers.anyObject(),
|
|
||||||
(Token<BlockTokenIdentifier>) Matchers.anyObject(),
|
|
||||||
Matchers.anyLong(),
|
|
||||||
Matchers.anyLong(),
|
|
||||||
Matchers.anyInt(),
|
|
||||||
Matchers.anyBoolean(),
|
|
||||||
Matchers.anyString(),
|
|
||||||
(CachingStrategy)Matchers.anyObject());
|
|
||||||
|
|
||||||
// Initial read
|
// Initial read
|
||||||
pread(in, 0, dataBuf, 0, dataBuf.length, authenticData);
|
pread(in, 0, dataBuf, 0, dataBuf.length, authenticData);
|
||||||
// Read again and verify that the socket is the same
|
// Read again and verify that the socket is the same
|
||||||
|
@ -153,5 +115,8 @@ public void testReadFromOneDN() throws Exception {
|
||||||
pread(in, 64, dataBuf, 0, dataBuf.length / 2, authenticData);
|
pread(in, 64, dataBuf, 0, dataBuf.length / 2, authenticData);
|
||||||
|
|
||||||
in.close();
|
in.close();
|
||||||
|
client.close();
|
||||||
|
Assert.assertEquals(1,
|
||||||
|
ClientContext.getFromConf(configuration).getPeerCache().size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CONTEXT;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
@ -86,21 +86,22 @@ public void testDatanodeRespectsKeepAliveTimeout() throws Exception {
|
||||||
// the datanode-side expiration time.
|
// the datanode-side expiration time.
|
||||||
final long CLIENT_EXPIRY_MS = 60000L;
|
final long CLIENT_EXPIRY_MS = 60000L;
|
||||||
clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
|
clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
|
||||||
PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT, CLIENT_EXPIRY_MS);
|
clientConf.set(DFS_CLIENT_CONTEXT, "testDatanodeRespectsKeepAliveTimeout");
|
||||||
DistributedFileSystem fs =
|
DistributedFileSystem fs =
|
||||||
(DistributedFileSystem)FileSystem.get(cluster.getURI(),
|
(DistributedFileSystem)FileSystem.get(cluster.getURI(),
|
||||||
clientConf);
|
clientConf);
|
||||||
|
PeerCache peerCache = ClientContext.getFromConf(clientConf).getPeerCache();
|
||||||
|
|
||||||
DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
|
DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
|
||||||
|
|
||||||
// Clients that write aren't currently re-used.
|
// Clients that write aren't currently re-used.
|
||||||
assertEquals(0, fs.dfs.peerCache.size());
|
assertEquals(0, peerCache.size());
|
||||||
assertXceiverCount(0);
|
assertXceiverCount(0);
|
||||||
|
|
||||||
// Reads the file, so we should get a
|
// Reads the file, so we should get a
|
||||||
// cached socket, and should have an xceiver on the other side.
|
// cached socket, and should have an xceiver on the other side.
|
||||||
DFSTestUtil.readFile(fs, TEST_FILE);
|
DFSTestUtil.readFile(fs, TEST_FILE);
|
||||||
assertEquals(1, fs.dfs.peerCache.size());
|
assertEquals(1, peerCache.size());
|
||||||
assertXceiverCount(1);
|
assertXceiverCount(1);
|
||||||
|
|
||||||
// Sleep for a bit longer than the keepalive timeout
|
// Sleep for a bit longer than the keepalive timeout
|
||||||
|
@ -111,15 +112,13 @@ public void testDatanodeRespectsKeepAliveTimeout() throws Exception {
|
||||||
// The socket is still in the cache, because we don't
|
// The socket is still in the cache, because we don't
|
||||||
// notice that it's closed until we try to read
|
// notice that it's closed until we try to read
|
||||||
// from it again.
|
// from it again.
|
||||||
assertEquals(1, fs.dfs.peerCache.size());
|
assertEquals(1, peerCache.size());
|
||||||
|
|
||||||
// Take it out of the cache - reading should
|
// Take it out of the cache - reading should
|
||||||
// give an EOF.
|
// give an EOF.
|
||||||
Peer peer = fs.dfs.peerCache.get(dn.getDatanodeId(), false);
|
Peer peer = peerCache.get(dn.getDatanodeId(), false);
|
||||||
assertNotNull(peer);
|
assertNotNull(peer);
|
||||||
assertEquals(-1, peer.getInputStream().read());
|
assertEquals(-1, peer.getInputStream().read());
|
||||||
PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT,
|
|
||||||
DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -132,34 +131,33 @@ public void testClientResponsesKeepAliveTimeout() throws Exception {
|
||||||
// the datanode-side expiration time.
|
// the datanode-side expiration time.
|
||||||
final long CLIENT_EXPIRY_MS = 10L;
|
final long CLIENT_EXPIRY_MS = 10L;
|
||||||
clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
|
clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
|
||||||
PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT, CLIENT_EXPIRY_MS);
|
clientConf.set(DFS_CLIENT_CONTEXT, "testClientResponsesKeepAliveTimeout");
|
||||||
DistributedFileSystem fs =
|
DistributedFileSystem fs =
|
||||||
(DistributedFileSystem)FileSystem.get(cluster.getURI(),
|
(DistributedFileSystem)FileSystem.get(cluster.getURI(),
|
||||||
clientConf);
|
clientConf);
|
||||||
|
PeerCache peerCache = ClientContext.getFromConf(clientConf).getPeerCache();
|
||||||
|
|
||||||
DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
|
DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
|
||||||
|
|
||||||
// Clients that write aren't currently re-used.
|
// Clients that write aren't currently re-used.
|
||||||
assertEquals(0, fs.dfs.peerCache.size());
|
assertEquals(0, peerCache.size());
|
||||||
assertXceiverCount(0);
|
assertXceiverCount(0);
|
||||||
|
|
||||||
// Reads the file, so we should get a
|
// Reads the file, so we should get a
|
||||||
// cached socket, and should have an xceiver on the other side.
|
// cached socket, and should have an xceiver on the other side.
|
||||||
DFSTestUtil.readFile(fs, TEST_FILE);
|
DFSTestUtil.readFile(fs, TEST_FILE);
|
||||||
assertEquals(1, fs.dfs.peerCache.size());
|
assertEquals(1, peerCache.size());
|
||||||
assertXceiverCount(1);
|
assertXceiverCount(1);
|
||||||
|
|
||||||
// Sleep for a bit longer than the client keepalive timeout.
|
// Sleep for a bit longer than the client keepalive timeout.
|
||||||
Thread.sleep(CLIENT_EXPIRY_MS + 1);
|
Thread.sleep(CLIENT_EXPIRY_MS + 1);
|
||||||
|
|
||||||
// Taking out a peer which is expired should give a null.
|
// Taking out a peer which is expired should give a null.
|
||||||
Peer peer = fs.dfs.peerCache.get(dn.getDatanodeId(), false);
|
Peer peer = peerCache.get(dn.getDatanodeId(), false);
|
||||||
assertTrue(peer == null);
|
assertTrue(peer == null);
|
||||||
|
|
||||||
// The socket cache is now empty.
|
// The socket cache is now empty.
|
||||||
assertEquals(0, fs.dfs.peerCache.size());
|
assertEquals(0, peerCache.size());
|
||||||
PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT,
|
|
||||||
DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -174,7 +172,7 @@ public void testSlowReader() throws Exception {
|
||||||
final long CLIENT_EXPIRY_MS = 600000L;
|
final long CLIENT_EXPIRY_MS = 600000L;
|
||||||
Configuration clientConf = new Configuration(conf);
|
Configuration clientConf = new Configuration(conf);
|
||||||
clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
|
clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
|
||||||
PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT, CLIENT_EXPIRY_MS);
|
clientConf.set(DFS_CLIENT_CONTEXT, "testSlowReader");
|
||||||
DistributedFileSystem fs =
|
DistributedFileSystem fs =
|
||||||
(DistributedFileSystem)FileSystem.get(cluster.getURI(),
|
(DistributedFileSystem)FileSystem.get(cluster.getURI(),
|
||||||
clientConf);
|
clientConf);
|
||||||
|
@ -209,7 +207,12 @@ public Boolean get() {
|
||||||
@Test(timeout=30000)
|
@Test(timeout=30000)
|
||||||
public void testManyClosedSocketsInCache() throws Exception {
|
public void testManyClosedSocketsInCache() throws Exception {
|
||||||
// Make a small file
|
// Make a small file
|
||||||
DistributedFileSystem fs = cluster.getFileSystem();
|
Configuration clientConf = new Configuration(conf);
|
||||||
|
clientConf.set(DFS_CLIENT_CONTEXT, "testManyClosedSocketsInCache");
|
||||||
|
DistributedFileSystem fs =
|
||||||
|
(DistributedFileSystem)FileSystem.get(cluster.getURI(),
|
||||||
|
clientConf);
|
||||||
|
PeerCache peerCache = ClientContext.getFromConf(clientConf).getPeerCache();
|
||||||
DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
|
DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
|
||||||
|
|
||||||
// Insert a bunch of dead sockets in the cache, by opening
|
// Insert a bunch of dead sockets in the cache, by opening
|
||||||
|
@ -227,15 +230,14 @@ public void testManyClosedSocketsInCache() throws Exception {
|
||||||
IOUtils.cleanup(null, stms);
|
IOUtils.cleanup(null, stms);
|
||||||
}
|
}
|
||||||
|
|
||||||
DFSClient client = ((DistributedFileSystem)fs).dfs;
|
assertEquals(5, peerCache.size());
|
||||||
assertEquals(5, client.peerCache.size());
|
|
||||||
|
|
||||||
// Let all the xceivers timeout
|
// Let all the xceivers timeout
|
||||||
Thread.sleep(1500);
|
Thread.sleep(1500);
|
||||||
assertXceiverCount(0);
|
assertXceiverCount(0);
|
||||||
|
|
||||||
// Client side still has the sockets cached
|
// Client side still has the sockets cached
|
||||||
assertEquals(5, client.peerCache.size());
|
assertEquals(5, peerCache.size());
|
||||||
|
|
||||||
// Reading should not throw an exception.
|
// Reading should not throw an exception.
|
||||||
DFSTestUtil.readFile(fs, TEST_FILE);
|
DFSTestUtil.readFile(fs, TEST_FILE);
|
||||||
|
|
|
@ -53,7 +53,8 @@ public void testDisableCache() throws Exception {
|
||||||
FileSystem fsWithoutCache = FileSystem.newInstance(util.getConf());
|
FileSystem fsWithoutCache = FileSystem.newInstance(util.getConf());
|
||||||
try {
|
try {
|
||||||
DFSTestUtil.readFile(fsWithoutCache, testFile);
|
DFSTestUtil.readFile(fsWithoutCache, testFile);
|
||||||
assertEquals(0, ((DistributedFileSystem)fsWithoutCache).dfs.peerCache.size());
|
assertEquals(0, ((DistributedFileSystem)fsWithoutCache).
|
||||||
|
dfs.getClientContext().getPeerCache().size());
|
||||||
} finally {
|
} finally {
|
||||||
fsWithoutCache.close();
|
fsWithoutCache.close();
|
||||||
util.shutdown();
|
util.shutdown();
|
||||||
|
|
|
@ -1,126 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.hdfs;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.io.IOUtils;
|
|
||||||
import org.apache.hadoop.net.unix.TemporarySocketDirectory;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
public class TestFileInputStreamCache {
|
|
||||||
static final Log LOG = LogFactory.getLog(TestFileInputStreamCache.class);
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testCreateAndDestroy() throws Exception {
|
|
||||||
FileInputStreamCache cache = new FileInputStreamCache(10, 1000);
|
|
||||||
cache.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class TestFileDescriptorPair {
|
|
||||||
TemporarySocketDirectory dir = new TemporarySocketDirectory();
|
|
||||||
FileInputStream fis[];
|
|
||||||
|
|
||||||
public TestFileDescriptorPair() throws IOException {
|
|
||||||
fis = new FileInputStream[2];
|
|
||||||
for (int i = 0; i < 2; i++) {
|
|
||||||
String name = dir.getDir() + "/file" + i;
|
|
||||||
FileOutputStream fos = new FileOutputStream(name);
|
|
||||||
fos.write(1);
|
|
||||||
fos.close();
|
|
||||||
fis[i] = new FileInputStream(name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public FileInputStream[] getFileInputStreams() {
|
|
||||||
return fis;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() throws IOException {
|
|
||||||
IOUtils.cleanup(LOG, fis);
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean compareWith(FileInputStream other[]) {
|
|
||||||
if ((other == null) || (fis == null)) {
|
|
||||||
return other == fis;
|
|
||||||
}
|
|
||||||
if (fis.length != other.length) return false;
|
|
||||||
for (int i = 0; i < fis.length; i++) {
|
|
||||||
if (fis[i] != other[i]) return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testAddAndRetrieve() throws Exception {
|
|
||||||
FileInputStreamCache cache = new FileInputStreamCache(1, 1000000);
|
|
||||||
DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost",
|
|
||||||
"xyzzy", 8080, 9090, 7070, 6060);
|
|
||||||
ExtendedBlock block = new ExtendedBlock("poolid", 123);
|
|
||||||
TestFileDescriptorPair pair = new TestFileDescriptorPair();
|
|
||||||
cache.put(dnId, block, pair.getFileInputStreams());
|
|
||||||
FileInputStream fis[] = cache.get(dnId, block);
|
|
||||||
Assert.assertTrue(pair.compareWith(fis));
|
|
||||||
pair.close();
|
|
||||||
cache.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testExpiry() throws Exception {
|
|
||||||
FileInputStreamCache cache = new FileInputStreamCache(1, 10);
|
|
||||||
DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost",
|
|
||||||
"xyzzy", 8080, 9090, 7070, 6060);
|
|
||||||
ExtendedBlock block = new ExtendedBlock("poolid", 123);
|
|
||||||
TestFileDescriptorPair pair = new TestFileDescriptorPair();
|
|
||||||
cache.put(dnId, block, pair.getFileInputStreams());
|
|
||||||
Thread.sleep(cache.getExpiryTimeMs() * 100);
|
|
||||||
FileInputStream fis[] = cache.get(dnId, block);
|
|
||||||
Assert.assertNull(fis);
|
|
||||||
pair.close();
|
|
||||||
cache.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testEviction() throws Exception {
|
|
||||||
FileInputStreamCache cache = new FileInputStreamCache(1, 10000000);
|
|
||||||
DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost",
|
|
||||||
"xyzzy", 8080, 9090, 7070, 6060);
|
|
||||||
ExtendedBlock block = new ExtendedBlock("poolid", 123);
|
|
||||||
TestFileDescriptorPair pair = new TestFileDescriptorPair();
|
|
||||||
cache.put(dnId, block, pair.getFileInputStreams());
|
|
||||||
DatanodeID dnId2 = new DatanodeID("127.0.0.1", "localhost",
|
|
||||||
"xyzzy", 8081, 9091, 7071, 6061);
|
|
||||||
TestFileDescriptorPair pair2 = new TestFileDescriptorPair();
|
|
||||||
cache.put(dnId2, block, pair2.getFileInputStreams());
|
|
||||||
FileInputStream fis[] = cache.get(dnId, block);
|
|
||||||
Assert.assertNull(fis);
|
|
||||||
FileInputStream fis2[] = cache.get(dnId2, block);
|
|
||||||
Assert.assertTrue(pair2.compareWith(fis2));
|
|
||||||
pair.close();
|
|
||||||
cache.close();
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -303,5 +303,6 @@ public void testGetFileStatusOnDir() throws Exception {
|
||||||
FileSystem.LOG.info("GOOD: getting an exception", ioe);
|
FileSystem.LOG.info("GOOD: getting an exception", ioe);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fs.delete(dir, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,346 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.mutable.MutableBoolean;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitCache;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitCache.ShortCircuitReplicaCreator;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitReplicaInfo;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.net.unix.TemporarySocketDirectory;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
import org.apache.hadoop.util.DataChecksum;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.common.base.Supplier;
|
||||||
|
|
||||||
|
import java.io.DataOutputStream;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class TestShortCircuitCache {
|
||||||
|
static final Log LOG = LogFactory.getLog(TestShortCircuitCache.class);
|
||||||
|
|
||||||
|
private static class TestFileDescriptorPair {
|
||||||
|
TemporarySocketDirectory dir = new TemporarySocketDirectory();
|
||||||
|
FileInputStream fis[];
|
||||||
|
|
||||||
|
public TestFileDescriptorPair() throws IOException {
|
||||||
|
fis = new FileInputStream[2];
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
String name = dir.getDir() + "/file" + i;
|
||||||
|
FileOutputStream fos = new FileOutputStream(name);
|
||||||
|
if (i == 0) {
|
||||||
|
// write 'data' file
|
||||||
|
fos.write(1);
|
||||||
|
} else {
|
||||||
|
// write 'metadata' file
|
||||||
|
BlockMetadataHeader header =
|
||||||
|
new BlockMetadataHeader((short)1,
|
||||||
|
DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 4));
|
||||||
|
DataOutputStream dos = new DataOutputStream(fos);
|
||||||
|
BlockMetadataHeader.writeHeader(dos, header);
|
||||||
|
dos.close();
|
||||||
|
}
|
||||||
|
fos.close();
|
||||||
|
fis[i] = new FileInputStream(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public FileInputStream[] getFileInputStreams() {
|
||||||
|
return fis;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
IOUtils.cleanup(LOG, fis);
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean compareWith(FileInputStream data, FileInputStream meta) {
|
||||||
|
return ((data == fis[0]) && (meta == fis[1]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class SimpleReplicaCreator
|
||||||
|
implements ShortCircuitReplicaCreator {
|
||||||
|
private final int blockId;
|
||||||
|
private final ShortCircuitCache cache;
|
||||||
|
private final TestFileDescriptorPair pair;
|
||||||
|
|
||||||
|
SimpleReplicaCreator(int blockId, ShortCircuitCache cache,
|
||||||
|
TestFileDescriptorPair pair) {
|
||||||
|
this.blockId = blockId;
|
||||||
|
this.cache = cache;
|
||||||
|
this.pair = pair;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
|
||||||
|
try {
|
||||||
|
ExtendedBlockId key = new ExtendedBlockId(blockId, "test_bp1");
|
||||||
|
return new ShortCircuitReplicaInfo(
|
||||||
|
new ShortCircuitReplica(key,
|
||||||
|
pair.getFileInputStreams()[0], pair.getFileInputStreams()[1],
|
||||||
|
cache, Time.monotonicNow()));
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout=60000)
|
||||||
|
public void testCreateAndDestroy() throws Exception {
|
||||||
|
ShortCircuitCache cache =
|
||||||
|
new ShortCircuitCache(10, 1, 10, 1, 1, 10000);
|
||||||
|
cache.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout=60000)
|
||||||
|
public void testAddAndRetrieve() throws Exception {
|
||||||
|
final ShortCircuitCache cache =
|
||||||
|
new ShortCircuitCache(10, 10000000, 10, 10000000, 1, 10000);
|
||||||
|
final TestFileDescriptorPair pair = new TestFileDescriptorPair();
|
||||||
|
ShortCircuitReplicaInfo replicaInfo1 =
|
||||||
|
cache.fetchOrCreate(new ExtendedBlockId(123, "test_bp1"),
|
||||||
|
new SimpleReplicaCreator(123, cache, pair));
|
||||||
|
Preconditions.checkNotNull(replicaInfo1.getReplica());
|
||||||
|
Preconditions.checkState(replicaInfo1.getInvalidTokenException() == null);
|
||||||
|
pair.compareWith(replicaInfo1.getReplica().getDataStream(),
|
||||||
|
replicaInfo1.getReplica().getMetaStream());
|
||||||
|
ShortCircuitReplicaInfo replicaInfo2 =
|
||||||
|
cache.fetchOrCreate(new ExtendedBlockId(123, "test_bp1"),
|
||||||
|
new ShortCircuitReplicaCreator() {
|
||||||
|
@Override
|
||||||
|
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
|
||||||
|
Assert.fail("expected to use existing entry.");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Preconditions.checkNotNull(replicaInfo2.getReplica());
|
||||||
|
Preconditions.checkState(replicaInfo2.getInvalidTokenException() == null);
|
||||||
|
Preconditions.checkState(replicaInfo1 == replicaInfo2);
|
||||||
|
pair.compareWith(replicaInfo2.getReplica().getDataStream(),
|
||||||
|
replicaInfo2.getReplica().getMetaStream());
|
||||||
|
replicaInfo1.getReplica().unref();
|
||||||
|
replicaInfo2.getReplica().unref();
|
||||||
|
|
||||||
|
// Even after the reference count falls to 0, we still keep the replica
|
||||||
|
// around for a while (we have configured the expiry period to be really,
|
||||||
|
// really long here)
|
||||||
|
ShortCircuitReplicaInfo replicaInfo3 =
|
||||||
|
cache.fetchOrCreate(
|
||||||
|
new ExtendedBlockId(123, "test_bp1"), new ShortCircuitReplicaCreator() {
|
||||||
|
@Override
|
||||||
|
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
|
||||||
|
Assert.fail("expected to use existing entry.");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Preconditions.checkNotNull(replicaInfo3.getReplica());
|
||||||
|
Preconditions.checkState(replicaInfo3.getInvalidTokenException() == null);
|
||||||
|
replicaInfo3.getReplica().unref();
|
||||||
|
|
||||||
|
pair.close();
|
||||||
|
cache.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout=60000)
|
||||||
|
public void testExpiry() throws Exception {
|
||||||
|
final ShortCircuitCache cache =
|
||||||
|
new ShortCircuitCache(2, 1, 1, 10000000, 1, 10000);
|
||||||
|
final TestFileDescriptorPair pair = new TestFileDescriptorPair();
|
||||||
|
ShortCircuitReplicaInfo replicaInfo1 =
|
||||||
|
cache.fetchOrCreate(
|
||||||
|
new ExtendedBlockId(123, "test_bp1"), new SimpleReplicaCreator(123, cache, pair));
|
||||||
|
Preconditions.checkNotNull(replicaInfo1.getReplica());
|
||||||
|
Preconditions.checkState(replicaInfo1.getInvalidTokenException() == null);
|
||||||
|
pair.compareWith(replicaInfo1.getReplica().getDataStream(),
|
||||||
|
replicaInfo1.getReplica().getMetaStream());
|
||||||
|
replicaInfo1.getReplica().unref();
|
||||||
|
final MutableBoolean triedToCreate = new MutableBoolean(false);
|
||||||
|
do {
|
||||||
|
Thread.sleep(10);
|
||||||
|
ShortCircuitReplicaInfo replicaInfo2 =
|
||||||
|
cache.fetchOrCreate(
|
||||||
|
new ExtendedBlockId(123, "test_bp1"), new ShortCircuitReplicaCreator() {
|
||||||
|
@Override
|
||||||
|
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
|
||||||
|
triedToCreate.setValue(true);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if ((replicaInfo2 != null) && (replicaInfo2.getReplica() != null)) {
|
||||||
|
replicaInfo2.getReplica().unref();
|
||||||
|
}
|
||||||
|
} while (triedToCreate.isFalse());
|
||||||
|
cache.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test(timeout=60000)
|
||||||
|
public void testEviction() throws Exception {
|
||||||
|
final ShortCircuitCache cache =
|
||||||
|
new ShortCircuitCache(2, 10000000, 1, 10000000, 1, 10000);
|
||||||
|
final TestFileDescriptorPair pairs[] = new TestFileDescriptorPair[] {
|
||||||
|
new TestFileDescriptorPair(),
|
||||||
|
new TestFileDescriptorPair(),
|
||||||
|
new TestFileDescriptorPair(),
|
||||||
|
};
|
||||||
|
ShortCircuitReplicaInfo replicaInfos[] = new ShortCircuitReplicaInfo[] {
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
};
|
||||||
|
for (int i = 0; i < pairs.length; i++) {
|
||||||
|
replicaInfos[i] = cache.fetchOrCreate(
|
||||||
|
new ExtendedBlockId(i, "test_bp1"),
|
||||||
|
new SimpleReplicaCreator(i, cache, pairs[i]));
|
||||||
|
Preconditions.checkNotNull(replicaInfos[i].getReplica());
|
||||||
|
Preconditions.checkState(replicaInfos[i].getInvalidTokenException() == null);
|
||||||
|
pairs[i].compareWith(replicaInfos[i].getReplica().getDataStream(),
|
||||||
|
replicaInfos[i].getReplica().getMetaStream());
|
||||||
|
}
|
||||||
|
// At this point, we have 3 replicas in use.
|
||||||
|
// Let's close them all.
|
||||||
|
for (int i = 0; i < pairs.length; i++) {
|
||||||
|
replicaInfos[i].getReplica().unref();
|
||||||
|
}
|
||||||
|
// The last two replicas should still be cached.
|
||||||
|
for (int i = 1; i < pairs.length; i++) {
|
||||||
|
final Integer iVal = new Integer(i);
|
||||||
|
replicaInfos[i] = cache.fetchOrCreate(
|
||||||
|
new ExtendedBlockId(i, "test_bp1"),
|
||||||
|
new ShortCircuitReplicaCreator() {
|
||||||
|
@Override
|
||||||
|
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
|
||||||
|
Assert.fail("expected to use existing entry for " + iVal);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Preconditions.checkNotNull(replicaInfos[i].getReplica());
|
||||||
|
Preconditions.checkState(replicaInfos[i].getInvalidTokenException() == null);
|
||||||
|
pairs[i].compareWith(replicaInfos[i].getReplica().getDataStream(),
|
||||||
|
replicaInfos[i].getReplica().getMetaStream());
|
||||||
|
}
|
||||||
|
// The first (oldest) replica should not be cached.
|
||||||
|
final MutableBoolean calledCreate = new MutableBoolean(false);
|
||||||
|
replicaInfos[0] = cache.fetchOrCreate(
|
||||||
|
new ExtendedBlockId(0, "test_bp1"),
|
||||||
|
new ShortCircuitReplicaCreator() {
|
||||||
|
@Override
|
||||||
|
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
|
||||||
|
calledCreate.setValue(true);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Preconditions.checkState(replicaInfos[0].getReplica() == null);
|
||||||
|
Assert.assertTrue(calledCreate.isTrue());
|
||||||
|
// Clean up
|
||||||
|
for (int i = 1; i < pairs.length; i++) {
|
||||||
|
replicaInfos[i].getReplica().unref();
|
||||||
|
}
|
||||||
|
for (int i = 0; i < pairs.length; i++) {
|
||||||
|
pairs[i].close();
|
||||||
|
}
|
||||||
|
cache.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout=60000)
|
||||||
|
public void testStaleness() throws Exception {
|
||||||
|
// Set up the cache with a short staleness time.
|
||||||
|
final ShortCircuitCache cache =
|
||||||
|
new ShortCircuitCache(2, 10000000, 1, 10000000, 1, 10);
|
||||||
|
final TestFileDescriptorPair pairs[] = new TestFileDescriptorPair[] {
|
||||||
|
new TestFileDescriptorPair(),
|
||||||
|
new TestFileDescriptorPair(),
|
||||||
|
};
|
||||||
|
ShortCircuitReplicaInfo replicaInfos[] = new ShortCircuitReplicaInfo[] {
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
};
|
||||||
|
final long HOUR_IN_MS = 60 * 60 * 1000;
|
||||||
|
for (int i = 0; i < pairs.length; i++) {
|
||||||
|
final Integer iVal = new Integer(i);
|
||||||
|
final ExtendedBlockId key = new ExtendedBlockId(i, "test_bp1");
|
||||||
|
replicaInfos[i] = cache.fetchOrCreate(key,
|
||||||
|
new ShortCircuitReplicaCreator() {
|
||||||
|
@Override
|
||||||
|
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
|
||||||
|
try {
|
||||||
|
return new ShortCircuitReplicaInfo(
|
||||||
|
new ShortCircuitReplica(key,
|
||||||
|
pairs[iVal].getFileInputStreams()[0],
|
||||||
|
pairs[iVal].getFileInputStreams()[1],
|
||||||
|
cache, Time.monotonicNow() + (iVal * HOUR_IN_MS)));
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Preconditions.checkNotNull(replicaInfos[i].getReplica());
|
||||||
|
Preconditions.checkState(replicaInfos[i].getInvalidTokenException() == null);
|
||||||
|
pairs[i].compareWith(replicaInfos[i].getReplica().getDataStream(),
|
||||||
|
replicaInfos[i].getReplica().getMetaStream());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keep trying to getOrCreate block 0 until it goes stale (and we must re-create.)
|
||||||
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
|
@Override
|
||||||
|
public Boolean get() {
|
||||||
|
ShortCircuitReplicaInfo info = cache.fetchOrCreate(
|
||||||
|
new ExtendedBlockId(0, "test_bp1"), new ShortCircuitReplicaCreator() {
|
||||||
|
@Override
|
||||||
|
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (info.getReplica() != null) {
|
||||||
|
info.getReplica().unref();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}, 500, 60000);
|
||||||
|
|
||||||
|
// Make sure that second replica did not go stale.
|
||||||
|
ShortCircuitReplicaInfo info = cache.fetchOrCreate(
|
||||||
|
new ExtendedBlockId(1, "test_bp1"), new ShortCircuitReplicaCreator() {
|
||||||
|
@Override
|
||||||
|
public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
|
||||||
|
Assert.fail("second replica went stale, despite 1 " +
|
||||||
|
"hour staleness time.");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
info.getReplica().unref();
|
||||||
|
|
||||||
|
// Clean up
|
||||||
|
for (int i = 1; i < pairs.length; i++) {
|
||||||
|
replicaInfos[i].getReplica().unref();
|
||||||
|
}
|
||||||
|
cache.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -27,6 +27,7 @@
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.security.PrivilegedExceptionAction;
|
import java.security.PrivilegedExceptionAction;
|
||||||
|
import java.util.UUID;
|
||||||
import java.util.concurrent.TimeoutException;
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
@ -35,8 +36,9 @@
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
|
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitCache;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
|
||||||
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
|
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||||
|
@ -125,8 +127,9 @@ static void checkFileContent(URI uri, Path name, byte[] expected,
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
// Ensure short circuit is enabled
|
// Ensure short circuit is enabled
|
||||||
DistributedFileSystem fs = getFileSystem(readingUser, uri, conf);
|
DistributedFileSystem fs = getFileSystem(readingUser, uri, conf);
|
||||||
|
ClientContext getClientContext = ClientContext.getFromConf(conf);
|
||||||
if (legacyShortCircuitFails) {
|
if (legacyShortCircuitFails) {
|
||||||
assertTrue(fs.getClient().useLegacyBlockReaderLocal());
|
assertFalse(getClientContext.getDisableLegacyBlockReaderLocal());
|
||||||
}
|
}
|
||||||
|
|
||||||
FSDataInputStream stm = fs.open(name);
|
FSDataInputStream stm = fs.open(name);
|
||||||
|
@ -155,7 +158,7 @@ static void checkFileContent(URI uri, Path name, byte[] expected,
|
||||||
checkData(actual, readOffset, expected, "Read 3");
|
checkData(actual, readOffset, expected, "Read 3");
|
||||||
|
|
||||||
if (legacyShortCircuitFails) {
|
if (legacyShortCircuitFails) {
|
||||||
assertFalse(fs.getClient().useLegacyBlockReaderLocal());
|
assertTrue(getClientContext.getDisableLegacyBlockReaderLocal());
|
||||||
}
|
}
|
||||||
stm.close();
|
stm.close();
|
||||||
}
|
}
|
||||||
|
@ -175,8 +178,9 @@ static void checkFileContentDirect(URI uri, Path name, byte[] expected,
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
// Ensure short circuit is enabled
|
// Ensure short circuit is enabled
|
||||||
DistributedFileSystem fs = getFileSystem(readingUser, uri, conf);
|
DistributedFileSystem fs = getFileSystem(readingUser, uri, conf);
|
||||||
|
ClientContext clientContext = ClientContext.getFromConf(conf);
|
||||||
if (legacyShortCircuitFails) {
|
if (legacyShortCircuitFails) {
|
||||||
assertTrue(fs.getClient().useLegacyBlockReaderLocal());
|
assertTrue(clientContext.getDisableLegacyBlockReaderLocal());
|
||||||
}
|
}
|
||||||
|
|
||||||
HdfsDataInputStream stm = (HdfsDataInputStream)fs.open(name);
|
HdfsDataInputStream stm = (HdfsDataInputStream)fs.open(name);
|
||||||
|
@ -209,7 +213,7 @@ static void checkFileContentDirect(URI uri, Path name, byte[] expected,
|
||||||
}
|
}
|
||||||
checkData(arrayFromByteBuffer(actual), readOffset, expected, "Read 3");
|
checkData(arrayFromByteBuffer(actual), readOffset, expected, "Read 3");
|
||||||
if (legacyShortCircuitFails) {
|
if (legacyShortCircuitFails) {
|
||||||
assertFalse(fs.getClient().useLegacyBlockReaderLocal());
|
assertTrue(clientContext.getDisableLegacyBlockReaderLocal());
|
||||||
}
|
}
|
||||||
stm.close();
|
stm.close();
|
||||||
}
|
}
|
||||||
|
@ -223,7 +227,6 @@ public void doTestShortCircuitReadLegacy(boolean ignoreChecksum, int size,
|
||||||
|
|
||||||
public void doTestShortCircuitRead(boolean ignoreChecksum, int size,
|
public void doTestShortCircuitRead(boolean ignoreChecksum, int size,
|
||||||
int readOffset) throws IOException, InterruptedException {
|
int readOffset) throws IOException, InterruptedException {
|
||||||
String shortCircuitUser = getCurrentUser();
|
|
||||||
doTestShortCircuitReadImpl(ignoreChecksum, size, readOffset,
|
doTestShortCircuitReadImpl(ignoreChecksum, size, readOffset,
|
||||||
null, getCurrentUser(), false);
|
null, getCurrentUser(), false);
|
||||||
}
|
}
|
||||||
|
@ -239,6 +242,10 @@ public void doTestShortCircuitReadImpl(boolean ignoreChecksum, int size,
|
||||||
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
|
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
|
||||||
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
|
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
|
||||||
ignoreChecksum);
|
ignoreChecksum);
|
||||||
|
// Set a random client context name so that we don't share a cache with
|
||||||
|
// other invocations of this function.
|
||||||
|
conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT,
|
||||||
|
UUID.randomUUID().toString());
|
||||||
conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
|
conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
|
||||||
new File(sockDir.getDir(),
|
new File(sockDir.getDir(),
|
||||||
"TestShortCircuitLocalRead._PORT.sock").getAbsolutePath());
|
"TestShortCircuitLocalRead._PORT.sock").getAbsolutePath());
|
||||||
|
@ -322,18 +329,6 @@ public void testLongFile() throws Exception {
|
||||||
doTestShortCircuitRead(true, 10*blockSize+100, 777);
|
doTestShortCircuitRead(true, 10*blockSize+100, 777);
|
||||||
}
|
}
|
||||||
|
|
||||||
private ClientDatanodeProtocol getProxy(UserGroupInformation ugi,
|
|
||||||
final DatanodeID dnInfo, final Configuration conf) throws IOException,
|
|
||||||
InterruptedException {
|
|
||||||
return ugi.doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
|
|
||||||
@Override
|
|
||||||
public ClientDatanodeProtocol run() throws Exception {
|
|
||||||
return DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf, 60000,
|
|
||||||
false);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
private static DistributedFileSystem getFileSystem(String user, final URI uri,
|
private static DistributedFileSystem getFileSystem(String user, final URI uri,
|
||||||
final Configuration conf) throws InterruptedException, IOException {
|
final Configuration conf) throws InterruptedException, IOException {
|
||||||
UserGroupInformation ugi = UserGroupInformation.createRemoteUser(user);
|
UserGroupInformation ugi = UserGroupInformation.createRemoteUser(user);
|
||||||
|
@ -555,8 +550,7 @@ public void run() {
|
||||||
for (int i = 0; i < iteration; i++) {
|
for (int i = 0; i < iteration; i++) {
|
||||||
try {
|
try {
|
||||||
String user = getCurrentUser();
|
String user = getCurrentUser();
|
||||||
checkFileContent(fs.getUri(), file1, dataToWrite, 0, user, conf,
|
checkFileContent(fs.getUri(), file1, dataToWrite, 0, user, conf, true);
|
||||||
true);
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
|
@ -608,7 +602,8 @@ public void doTestShortCircuitReadWithRemoteBlockReader(boolean ignoreChecksum,
|
||||||
stm.write(fileData);
|
stm.write(fileData);
|
||||||
stm.close();
|
stm.close();
|
||||||
try {
|
try {
|
||||||
checkFileContent(uri, file1, fileData, readOffset, shortCircuitUser, conf, shortCircuitFails);
|
checkFileContent(uri, file1, fileData, readOffset, shortCircuitUser,
|
||||||
|
conf, shortCircuitFails);
|
||||||
//RemoteBlockReader have unsupported method read(ByteBuffer bf)
|
//RemoteBlockReader have unsupported method read(ByteBuffer bf)
|
||||||
assertTrue("RemoteBlockReader unsupported method read(ByteBuffer bf) error",
|
assertTrue("RemoteBlockReader unsupported method read(ByteBuffer bf) error",
|
||||||
checkUnsupportedMethod(fs, file1, fileData, readOffset));
|
checkUnsupportedMethod(fs, file1, fileData, readOffset));
|
||||||
|
|
|
@ -38,10 +38,16 @@
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hdfs.BlockReader;
|
import org.apache.hadoop.hdfs.BlockReader;
|
||||||
import org.apache.hadoop.hdfs.BlockReaderFactory;
|
import org.apache.hadoop.hdfs.BlockReaderFactory;
|
||||||
|
import org.apache.hadoop.hdfs.ClientContext;
|
||||||
import org.apache.hadoop.hdfs.DFSClient;
|
import org.apache.hadoop.hdfs.DFSClient;
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
import org.apache.hadoop.hdfs.DFSClient.Conf;
|
||||||
|
import org.apache.hadoop.hdfs.RemotePeerFactory;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitCache;
|
||||||
|
import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
|
||||||
|
import org.apache.hadoop.hdfs.net.Peer;
|
||||||
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
|
@ -55,10 +61,13 @@
|
||||||
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.log4j.Level;
|
import org.apache.log4j.Level;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
public class TestBlockTokenWithDFS {
|
public class TestBlockTokenWithDFS {
|
||||||
|
@ -131,50 +140,70 @@ private static FSDataOutputStream writeFile(FileSystem fileSys, Path name,
|
||||||
}
|
}
|
||||||
|
|
||||||
// try reading a block using a BlockReader directly
|
// try reading a block using a BlockReader directly
|
||||||
private static void tryRead(Configuration conf, LocatedBlock lblock,
|
private static void tryRead(final Configuration conf, LocatedBlock lblock,
|
||||||
boolean shouldSucceed) {
|
boolean shouldSucceed) {
|
||||||
InetSocketAddress targetAddr = null;
|
InetSocketAddress targetAddr = null;
|
||||||
Socket s = null;
|
IOException ioe = null;
|
||||||
BlockReader blockReader = null;
|
BlockReader blockReader = null;
|
||||||
ExtendedBlock block = lblock.getBlock();
|
ExtendedBlock block = lblock.getBlock();
|
||||||
try {
|
try {
|
||||||
DatanodeInfo[] nodes = lblock.getLocations();
|
DatanodeInfo[] nodes = lblock.getLocations();
|
||||||
targetAddr = NetUtils.createSocketAddr(nodes[0].getXferAddr());
|
targetAddr = NetUtils.createSocketAddr(nodes[0].getXferAddr());
|
||||||
s = NetUtils.getDefaultSocketFactory(conf).createSocket();
|
|
||||||
s.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT);
|
|
||||||
s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
|
|
||||||
|
|
||||||
String file = BlockReaderFactory.getFileName(targetAddr,
|
|
||||||
"test-blockpoolid", block.getBlockId());
|
|
||||||
blockReader = BlockReaderFactory.newBlockReader(
|
|
||||||
new DFSClient.Conf(conf), file, block, lblock.getBlockToken(), 0, -1,
|
|
||||||
true, "TestBlockTokenWithDFS", TcpPeerServer.peerFromSocket(s),
|
|
||||||
nodes[0], null, null, null, false,
|
|
||||||
CachingStrategy.newDefaultStrategy());
|
|
||||||
|
|
||||||
|
blockReader = new BlockReaderFactory(new DFSClient.Conf(conf)).
|
||||||
|
setFileName(BlockReaderFactory.getFileName(targetAddr,
|
||||||
|
"test-blockpoolid", block.getBlockId())).
|
||||||
|
setBlock(block).
|
||||||
|
setBlockToken(lblock.getBlockToken()).
|
||||||
|
setInetSocketAddress(targetAddr).
|
||||||
|
setStartOffset(0).
|
||||||
|
setLength(-1).
|
||||||
|
setVerifyChecksum(true).
|
||||||
|
setClientName("TestBlockTokenWithDFS").
|
||||||
|
setDatanodeInfo(nodes[0]).
|
||||||
|
setCachingStrategy(CachingStrategy.newDefaultStrategy()).
|
||||||
|
setClientCacheContext(ClientContext.getFromConf(conf)).
|
||||||
|
setConfiguration(conf).
|
||||||
|
setRemotePeerFactory(new RemotePeerFactory() {
|
||||||
|
@Override
|
||||||
|
public Peer newConnectedPeer(InetSocketAddress addr)
|
||||||
|
throws IOException {
|
||||||
|
Peer peer = null;
|
||||||
|
Socket sock = NetUtils.getDefaultSocketFactory(conf).createSocket();
|
||||||
|
try {
|
||||||
|
sock.connect(addr, HdfsServerConstants.READ_TIMEOUT);
|
||||||
|
sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
|
||||||
|
peer = TcpPeerServer.peerFromSocket(sock);
|
||||||
|
} finally {
|
||||||
|
if (peer == null) {
|
||||||
|
IOUtils.closeSocket(sock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return peer;
|
||||||
|
}
|
||||||
|
}).
|
||||||
|
build();
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
if (ex instanceof InvalidBlockTokenException) {
|
ioe = ex;
|
||||||
assertFalse("OP_READ_BLOCK: access token is invalid, "
|
|
||||||
+ "when it is expected to be valid", shouldSucceed);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
fail("OP_READ_BLOCK failed due to reasons other than access token: "
|
|
||||||
+ StringUtils.stringifyException(ex));
|
|
||||||
} finally {
|
} finally {
|
||||||
if (s != null) {
|
if (blockReader != null) {
|
||||||
try {
|
try {
|
||||||
s.close();
|
blockReader.close();
|
||||||
} catch (IOException iex) {
|
} catch (IOException e) {
|
||||||
} finally {
|
throw new RuntimeException(e);
|
||||||
s = null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (blockReader == null) {
|
if (shouldSucceed) {
|
||||||
fail("OP_READ_BLOCK failed due to reasons other than access token");
|
Assert.assertNotNull("OP_READ_BLOCK: access token is invalid, "
|
||||||
|
+ "when it is expected to be valid", blockReader);
|
||||||
|
} else {
|
||||||
|
Assert.assertNotNull("OP_READ_BLOCK: access token is valid, "
|
||||||
|
+ "when it is expected to be invalid", ioe);
|
||||||
|
Assert.assertTrue(
|
||||||
|
"OP_READ_BLOCK failed due to reasons other than access token: ",
|
||||||
|
ioe instanceof InvalidBlockTokenException);
|
||||||
}
|
}
|
||||||
assertTrue("OP_READ_BLOCK: access token is valid, "
|
|
||||||
+ "when it is expected to be invalid", shouldSucceed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// get a conf for testing
|
// get a conf for testing
|
||||||
|
@ -347,9 +376,13 @@ public void testRead() throws Exception {
|
||||||
/*
|
/*
|
||||||
* testing READ interface on DN using a BlockReader
|
* testing READ interface on DN using a BlockReader
|
||||||
*/
|
*/
|
||||||
|
DFSClient client = null;
|
||||||
new DFSClient(new InetSocketAddress("localhost",
|
try {
|
||||||
|
client = new DFSClient(new InetSocketAddress("localhost",
|
||||||
cluster.getNameNodePort()), conf);
|
cluster.getNameNodePort()), conf);
|
||||||
|
} finally {
|
||||||
|
if (client != null) client.close();
|
||||||
|
}
|
||||||
List<LocatedBlock> locatedBlocks = nnProto.getBlockLocations(
|
List<LocatedBlock> locatedBlocks = nnProto.getBlockLocations(
|
||||||
FILE_TO_READ, 0, FILE_SIZE).getLocatedBlocks();
|
FILE_TO_READ, 0, FILE_SIZE).getLocatedBlocks();
|
||||||
LocatedBlock lblock = locatedBlocks.get(0); // first block
|
LocatedBlock lblock = locatedBlocks.get(0); // first block
|
||||||
|
|
|
@ -0,0 +1,161 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.server.blockmanagement;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
|
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||||
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||||
|
import org.apache.hadoop.hdfs.StorageType;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
|
import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
|
||||||
|
import org.apache.hadoop.hdfs.server.common.StorageInfo;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
|
import org.apache.hadoop.test.PathUtils;
|
||||||
|
import org.apache.hadoop.util.VersionInfo;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
public class TestReplicationPolicyConsiderLoad {
|
||||||
|
|
||||||
|
private static NameNode namenode;
|
||||||
|
private static DatanodeManager dnManager;
|
||||||
|
private static List<DatanodeRegistration> dnrList;
|
||||||
|
private static DatanodeDescriptor[] dataNodes;
|
||||||
|
private static DatanodeStorageInfo[] storages;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setupCluster() throws IOException {
|
||||||
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
final String[] racks = {
|
||||||
|
"/rack1",
|
||||||
|
"/rack1",
|
||||||
|
"/rack1",
|
||||||
|
"/rack2",
|
||||||
|
"/rack2",
|
||||||
|
"/rack2"};
|
||||||
|
storages = DFSTestUtil.createDatanodeStorageInfos(racks);
|
||||||
|
dataNodes = DFSTestUtil.toDatanodeDescriptor(storages);
|
||||||
|
FileSystem.setDefaultUri(conf, "hdfs://localhost:0");
|
||||||
|
conf.set(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY, "0.0.0.0:0");
|
||||||
|
File baseDir = PathUtils.getTestDir(TestReplicationPolicy.class);
|
||||||
|
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
|
||||||
|
new File(baseDir, "name").getPath());
|
||||||
|
conf.setBoolean(
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY, true);
|
||||||
|
conf.setBoolean(
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_WRITE_KEY, true);
|
||||||
|
conf.setBoolean(
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, true);
|
||||||
|
DFSTestUtil.formatNameNode(conf);
|
||||||
|
namenode = new NameNode(conf);
|
||||||
|
int blockSize = 1024;
|
||||||
|
|
||||||
|
dnrList = new ArrayList<DatanodeRegistration>();
|
||||||
|
dnManager = namenode.getNamesystem().getBlockManager().getDatanodeManager();
|
||||||
|
|
||||||
|
// Register DNs
|
||||||
|
for (int i=0; i < 6; i++) {
|
||||||
|
DatanodeRegistration dnr = new DatanodeRegistration(dataNodes[i],
|
||||||
|
new StorageInfo(), new ExportedBlockKeys(), VersionInfo.getVersion());
|
||||||
|
dnrList.add(dnr);
|
||||||
|
dnManager.registerDatanode(dnr);
|
||||||
|
dataNodes[i].getStorageInfos()[0].setUtilizationForTesting(
|
||||||
|
2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*blockSize, 0L,
|
||||||
|
2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*blockSize, 0L);
|
||||||
|
dataNodes[i].updateHeartbeat(
|
||||||
|
BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[i]),
|
||||||
|
0L, 0L, 0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that chooseTarget with considerLoad set to true correctly calculates
|
||||||
|
* load with decommissioned nodes.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testChooseTargetWithDecomNodes() throws IOException {
|
||||||
|
namenode.getNamesystem().writeLock();
|
||||||
|
try {
|
||||||
|
// Decommission DNs so BlockPlacementPolicyDefault.isGoodTarget()
|
||||||
|
// returns false
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
DatanodeInfo d = dnManager.getDatanodeByXferAddr(
|
||||||
|
dnrList.get(i).getIpAddr(),
|
||||||
|
dnrList.get(i).getXferPort());
|
||||||
|
d.setDecommissioned();
|
||||||
|
}
|
||||||
|
String blockPoolId = namenode.getNamesystem().getBlockPoolId();
|
||||||
|
dnManager.handleHeartbeat(dnrList.get(3),
|
||||||
|
BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[3]),
|
||||||
|
blockPoolId, dataNodes[3].getCacheCapacity(),
|
||||||
|
dataNodes[3].getCacheRemaining(),
|
||||||
|
2, 0, 0);
|
||||||
|
dnManager.handleHeartbeat(dnrList.get(4),
|
||||||
|
BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[4]),
|
||||||
|
blockPoolId, dataNodes[4].getCacheCapacity(),
|
||||||
|
dataNodes[4].getCacheRemaining(),
|
||||||
|
4, 0, 0);
|
||||||
|
dnManager.handleHeartbeat(dnrList.get(5),
|
||||||
|
BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[5]),
|
||||||
|
blockPoolId, dataNodes[5].getCacheCapacity(),
|
||||||
|
dataNodes[5].getCacheRemaining(),
|
||||||
|
4, 0, 0);
|
||||||
|
|
||||||
|
// Call chooseTarget()
|
||||||
|
DatanodeStorageInfo[] targets = namenode.getNamesystem().getBlockManager()
|
||||||
|
.getBlockPlacementPolicy().chooseTarget("testFile.txt", 3,
|
||||||
|
dataNodes[0], new ArrayList<DatanodeStorageInfo>(), false, null,
|
||||||
|
1024, StorageType.DEFAULT);
|
||||||
|
|
||||||
|
assertEquals(3, targets.length);
|
||||||
|
Set<DatanodeStorageInfo> targetSet = new HashSet<DatanodeStorageInfo>(
|
||||||
|
Arrays.asList(targets));
|
||||||
|
for (int i = 3; i < storages.length; i++) {
|
||||||
|
assertTrue(targetSet.contains(storages[i]));
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
dataNodes[0].stopDecommission();
|
||||||
|
dataNodes[1].stopDecommission();
|
||||||
|
dataNodes[2].stopDecommission();
|
||||||
|
namenode.getNamesystem().writeUnlock();
|
||||||
|
}
|
||||||
|
NameNode.LOG.info("Done working on it");
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void teardownCluster() {
|
||||||
|
if (namenode != null) namenode.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -35,11 +35,14 @@
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hdfs.BlockReader;
|
import org.apache.hadoop.hdfs.BlockReader;
|
||||||
import org.apache.hadoop.hdfs.BlockReaderFactory;
|
import org.apache.hadoop.hdfs.BlockReaderFactory;
|
||||||
|
import org.apache.hadoop.hdfs.ClientContext;
|
||||||
import org.apache.hadoop.hdfs.DFSClient;
|
import org.apache.hadoop.hdfs.DFSClient;
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
import org.apache.hadoop.hdfs.RemotePeerFactory;
|
||||||
|
import org.apache.hadoop.hdfs.net.Peer;
|
||||||
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
||||||
import org.apache.hadoop.hdfs.protocol.Block;
|
import org.apache.hadoop.hdfs.protocol.Block;
|
||||||
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
|
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
|
||||||
|
@ -48,13 +51,14 @@
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
|
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
|
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
@ -284,23 +288,43 @@ private boolean deteteBlocks(File dir) {
|
||||||
private void accessBlock(DatanodeInfo datanode, LocatedBlock lblock)
|
private void accessBlock(DatanodeInfo datanode, LocatedBlock lblock)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
InetSocketAddress targetAddr = null;
|
InetSocketAddress targetAddr = null;
|
||||||
Socket s = null;
|
|
||||||
ExtendedBlock block = lblock.getBlock();
|
ExtendedBlock block = lblock.getBlock();
|
||||||
|
|
||||||
targetAddr = NetUtils.createSocketAddr(datanode.getXferAddr());
|
targetAddr = NetUtils.createSocketAddr(datanode.getXferAddr());
|
||||||
|
|
||||||
s = NetUtils.getDefaultSocketFactory(conf).createSocket();
|
BlockReader blockReader = new BlockReaderFactory(new DFSClient.Conf(conf)).
|
||||||
s.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT);
|
setInetSocketAddress(targetAddr).
|
||||||
s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
|
setBlock(block).
|
||||||
|
setFileName(BlockReaderFactory.getFileName(targetAddr,
|
||||||
String file = BlockReaderFactory.getFileName(targetAddr,
|
"test-blockpoolid", block.getBlockId())).
|
||||||
"test-blockpoolid",
|
setBlockToken(lblock.getBlockToken()).
|
||||||
block.getBlockId());
|
setStartOffset(0).
|
||||||
BlockReader blockReader =
|
setLength(-1).
|
||||||
BlockReaderFactory.newBlockReader(new DFSClient.Conf(conf), file, block,
|
setVerifyChecksum(true).
|
||||||
lblock.getBlockToken(), 0, -1, true, "TestDataNodeVolumeFailure",
|
setClientName("TestDataNodeVolumeFailure").
|
||||||
TcpPeerServer.peerFromSocket(s), datanode, null, null, null, false,
|
setDatanodeInfo(datanode).
|
||||||
CachingStrategy.newDefaultStrategy());
|
setCachingStrategy(CachingStrategy.newDefaultStrategy()).
|
||||||
|
setClientCacheContext(ClientContext.getFromConf(conf)).
|
||||||
|
setConfiguration(conf).
|
||||||
|
setRemotePeerFactory(new RemotePeerFactory() {
|
||||||
|
@Override
|
||||||
|
public Peer newConnectedPeer(InetSocketAddress addr)
|
||||||
|
throws IOException {
|
||||||
|
Peer peer = null;
|
||||||
|
Socket sock = NetUtils.getDefaultSocketFactory(conf).createSocket();
|
||||||
|
try {
|
||||||
|
sock.connect(addr, HdfsServerConstants.READ_TIMEOUT);
|
||||||
|
sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
|
||||||
|
peer = TcpPeerServer.peerFromSocket(sock);
|
||||||
|
} finally {
|
||||||
|
if (peer == null) {
|
||||||
|
IOUtils.closeSocket(sock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return peer;
|
||||||
|
}
|
||||||
|
}).
|
||||||
|
build();
|
||||||
blockReader.close();
|
blockReader.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SaverContext.DeduplicationMap;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestDeduplicationMap {
|
||||||
|
@Test
|
||||||
|
public void testDeduplicationMap() {
|
||||||
|
DeduplicationMap<String> m = DeduplicationMap.newMap();
|
||||||
|
Assert.assertEquals(1, m.getId("1"));
|
||||||
|
Assert.assertEquals(2, m.getId("2"));
|
||||||
|
Assert.assertEquals(3, m.getId("3"));
|
||||||
|
Assert.assertEquals(1, m.getId("1"));
|
||||||
|
Assert.assertEquals(2, m.getId("2"));
|
||||||
|
Assert.assertEquals(3, m.getId("3"));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,138 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.EnumSet;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
|
import org.apache.hadoop.hdfs.DFSOutputStream;
|
||||||
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||||
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
|
||||||
|
import org.apache.hadoop.hdfs.util.MD5FileUtils;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestFSImage {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPersist() throws IOException {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
testPersistHelper(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCompression() throws IOException {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true);
|
||||||
|
conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY,
|
||||||
|
"org.apache.hadoop.io.compress.GzipCodec");
|
||||||
|
testPersistHelper(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testPersistHelper(Configuration conf) throws IOException {
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
try {
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
FSNamesystem fsn = cluster.getNamesystem();
|
||||||
|
DistributedFileSystem fs = cluster.getFileSystem();
|
||||||
|
|
||||||
|
final Path dir = new Path("/abc/def");
|
||||||
|
final Path file1 = new Path(dir, "f1");
|
||||||
|
final Path file2 = new Path(dir, "f2");
|
||||||
|
|
||||||
|
// create an empty file f1
|
||||||
|
fs.create(file1).close();
|
||||||
|
|
||||||
|
// create an under-construction file f2
|
||||||
|
FSDataOutputStream out = fs.create(file2);
|
||||||
|
out.writeBytes("hello");
|
||||||
|
((DFSOutputStream) out.getWrappedStream()).hsync(EnumSet
|
||||||
|
.of(SyncFlag.UPDATE_LENGTH));
|
||||||
|
|
||||||
|
// checkpoint
|
||||||
|
fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
|
||||||
|
fs.saveNamespace();
|
||||||
|
fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
|
||||||
|
|
||||||
|
cluster.restartNameNode();
|
||||||
|
cluster.waitActive();
|
||||||
|
fs = cluster.getFileSystem();
|
||||||
|
|
||||||
|
assertTrue(fs.isDirectory(dir));
|
||||||
|
assertTrue(fs.exists(file1));
|
||||||
|
assertTrue(fs.exists(file2));
|
||||||
|
|
||||||
|
// check internals of file2
|
||||||
|
INodeFile file2Node = fsn.dir.getINode4Write(file2.toString()).asFile();
|
||||||
|
assertEquals("hello".length(), file2Node.computeFileSize());
|
||||||
|
assertTrue(file2Node.isUnderConstruction());
|
||||||
|
BlockInfo[] blks = file2Node.getBlocks();
|
||||||
|
assertEquals(1, blks.length);
|
||||||
|
assertEquals(BlockUCState.UNDER_CONSTRUCTION, blks[0].getBlockUCState());
|
||||||
|
// check lease manager
|
||||||
|
Lease lease = fsn.leaseManager.getLeaseByPath(file2.toString());
|
||||||
|
Assert.assertNotNull(lease);
|
||||||
|
} finally {
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure that the digest written by the saver equals to the digest of the
|
||||||
|
* file.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDigest() throws IOException {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
try {
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build();
|
||||||
|
DistributedFileSystem fs = cluster.getFileSystem();
|
||||||
|
fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
|
||||||
|
fs.saveNamespace();
|
||||||
|
fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
|
||||||
|
File currentDir = FSImageTestUtil.getNameNodeCurrentDirs(cluster, 0).get(
|
||||||
|
0);
|
||||||
|
File fsimage = FSImageTestUtil.findNewestImageFile(currentDir
|
||||||
|
.getAbsolutePath());
|
||||||
|
assertEquals(MD5FileUtils.readStoredMd5ForFile(fsimage),
|
||||||
|
MD5FileUtils.computeMd5ForFile(fsimage));
|
||||||
|
} finally {
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -27,17 +27,12 @@
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
|
import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
public class TestFSImageStorageInspector {
|
public class TestFSImageStorageInspector {
|
||||||
private static final Log LOG = LogFactory.getLog(
|
|
||||||
TestFSImageStorageInspector.class);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple test with image, edits, and inprogress edits
|
* Simple test with image, edits, and inprogress edits
|
||||||
*/
|
*/
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue