From a53b8b6fdce111b1e35ad0dc563eb53d1c58462f Mon Sep 17 00:00:00 2001 From: Kai Zheng Date: Wed, 9 Aug 2017 10:12:58 +0800 Subject: [PATCH] HDFS-11975. Provide a system-default EC policy. Contributed by Huichun Lu --- .../hadoop/hdfs/DistributedFileSystem.java | 2 -- .../ClientNamenodeProtocolTranslatorPB.java | 4 ++- .../src/main/proto/erasurecoding.proto | 2 +- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 4 +++ ...amenodeProtocolServerSideTranslatorPB.java | 4 ++- .../namenode/ErasureCodingPolicyManager.java | 12 +++++-- .../server/namenode/NameNodeRpcServer.java | 14 +++++++- .../org/apache/hadoop/hdfs/tools/ECAdmin.java | 14 ++++---- .../src/main/resources/hdfs-default.xml | 8 +++++ .../src/site/markdown/HDFSErasureCoding.md | 8 +++++ .../hdfs/TestErasureCodingPolicies.java | 24 +++++++++++-- .../namenode/TestEnabledECPolicies.java | 10 +++--- .../test/resources/testErasureCodingConf.xml | 35 +++++++++++++++++++ 13 files changed, 117 insertions(+), 24 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index 13c5eb9dc7d..cd368d4de9a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -2515,8 +2515,6 @@ public class DistributedFileSystem extends FileSystem { public void setErasureCodingPolicy(final Path path, final String ecPolicyName) throws IOException { Path absF = fixRelativePart(path); - Preconditions.checkNotNull(ecPolicyName, "Erasure coding policy cannot be" + - " null."); new FileSystemLinkResolver() { @Override public Void doCall(final Path p) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java index 388788c89b9..aed41176e11 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java @@ -1518,7 +1518,9 @@ public class ClientNamenodeProtocolTranslatorPB implements final SetErasureCodingPolicyRequestProto.Builder builder = SetErasureCodingPolicyRequestProto.newBuilder(); builder.setSrc(src); - builder.setEcPolicyName(ecPolicyName); + if (ecPolicyName != null) { + builder.setEcPolicyName(ecPolicyName); + } SetErasureCodingPolicyRequestProto req = builder.build(); try { rpcProxy.setErasureCodingPolicy(null, req); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/erasurecoding.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/erasurecoding.proto index 65baab65a47..9f803503c39 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/erasurecoding.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/erasurecoding.proto @@ -25,7 +25,7 @@ import "hdfs.proto"; message SetErasureCodingPolicyRequestProto { required string src = 1; - required string ecPolicyName = 2; + optional string ecPolicyName = 2; } message SetErasureCodingPolicyResponseProto { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index d9568f2de38..dc9bf765b9e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -564,6 +564,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_NAMENODE_EC_POLICIES_ENABLED_DEFAULT = ""; public static final String DFS_NAMENODE_EC_POLICIES_MAX_CELLSIZE_KEY = "dfs.namenode.ec.policies.max.cellsize"; public static final int DFS_NAMENODE_EC_POLICIES_MAX_CELLSIZE_DEFAULT = 4 * 1024 * 1024; + public static final String DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY = + "dfs.namenode.ec.system.default.policy"; + public static final String DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY_DEFAULT = + "RS-6-3-64k"; public static final String DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_THREADS_KEY = "dfs.datanode.ec.reconstruction.stripedread.threads"; public static final int DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_THREADS_DEFAULT = 20; public static final String DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_BUFFER_SIZE_KEY = "dfs.datanode.ec.reconstruction.stripedread.buffer.size"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java index 4ac49fe12f7..38b81c68c65 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java @@ -1488,7 +1488,9 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements RpcController controller, SetErasureCodingPolicyRequestProto req) throws ServiceException { try { - server.setErasureCodingPolicy(req.getSrc(), req.getEcPolicyName()); + String ecPolicyName = req.hasEcPolicyName() ? + req.getEcPolicyName() : null; + server.setErasureCodingPolicy(req.getSrc(), ecPolicyName); return SetErasureCodingPolicyResponseProto.newBuilder().build(); } catch (IOException e) { throw new ServiceException(e); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java index 266d45cdc0b..18b8e8a9036 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -92,9 +93,14 @@ public final class ErasureCodingPolicyManager { public void init(Configuration conf) { // Populate the list of enabled policies from configuration - final String[] policyNames = conf.getTrimmedStrings( - DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_DEFAULT); + final String[] enablePolicyNames = conf.getTrimmedStrings( + DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, + DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_DEFAULT); + final String defaultPolicyName = conf.getTrimmed( + DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY, + DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY_DEFAULT); + final String[] policyNames = + (String[]) ArrayUtils.add(enablePolicyNames, defaultPolicyName); this.userPoliciesByID = new TreeMap<>(); this.userPoliciesByName = new TreeMap<>(); this.removedPoliciesByName = new TreeMap<>(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index 9265381b84c..d304d3dd293 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -251,13 +251,15 @@ public class NameNodeRpcServer implements NamenodeProtocols { private final String minimumDataNodeVersion; + private final String defaultECPolicyName; + public NameNodeRpcServer(Configuration conf, NameNode nn) throws IOException { this.nn = nn; this.namesystem = nn.getNamesystem(); this.retryCache = namesystem.getRetryCache(); this.metrics = NameNode.getNameNodeMetrics(); - + int handlerCount = conf.getInt(DFS_NAMENODE_HANDLER_COUNT_KEY, DFS_NAMENODE_HANDLER_COUNT_DEFAULT); @@ -490,6 +492,10 @@ public class NameNodeRpcServer implements NamenodeProtocols { DFSConfigKeys.DFS_NAMENODE_MIN_SUPPORTED_DATANODE_VERSION_KEY, DFSConfigKeys.DFS_NAMENODE_MIN_SUPPORTED_DATANODE_VERSION_DEFAULT); + defaultECPolicyName = conf.get( + DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY, + DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY_DEFAULT); + // Set terse exception whose stack trace won't be logged clientRpcServer.addTerseExceptions(SafeModeException.class, FileNotFoundException.class, @@ -2055,6 +2061,12 @@ public class NameNodeRpcServer implements NamenodeProtocols { } boolean success = false; try { + if (ecPolicyName == null) { + ecPolicyName = defaultECPolicyName; + LOG.trace("No policy name is specified, " + + "set the default policy name instead"); + } + LOG.trace("Set erasure coding policy " + ecPolicyName + " on " + src); namesystem.setErasureCodingPolicy(src, ecPolicyName, cacheEntry != null); success = true; } finally { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/ECAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/ECAdmin.java index 5006b5a20e0..46600a0f3ab 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/ECAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/ECAdmin.java @@ -335,11 +335,6 @@ public class ECAdmin extends Configured implements Tool { final String ecPolicyName = StringUtils.popOptionWithArgument("-policy", args); - if (ecPolicyName == null) { - System.err.println("Please specify the policy name.\nUsage: " + - getLongUsage()); - return 1; - } if (args.size() > 0) { System.err.println(getName() + ": Too many arguments"); @@ -350,8 +345,13 @@ public class ECAdmin extends Configured implements Tool { final DistributedFileSystem dfs = AdminHelper.getDFS(p.toUri(), conf); try { dfs.setErasureCodingPolicy(p, ecPolicyName); - System.out.println("Set erasure coding policy " + ecPolicyName + - " on " + path); + if (ecPolicyName == null){ + System.out.println("Set default erasure coding policy" + + " on " + path); + } else { + System.out.println("Set erasure coding policy " + ecPolicyName + + " on " + path); + } } catch (Exception e) { System.err.println(AdminHelper.prettifyException(e)); return 2; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index bb623598ac6..49429672cc4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -2975,6 +2975,14 @@ + + dfs.namenode.ec.system.default.policy + RS-6-3-64k + The default erasure coding policy name will be used + on the path if no policy name is passed. + + + dfs.namenode.ec.policies.max.cellsize 4194304 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSErasureCoding.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSErasureCoding.md index 88293ba63a5..4a48c2ad082 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSErasureCoding.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSErasureCoding.md @@ -117,6 +117,11 @@ Deployment be more appropriate. If the administrator only cares about node-level fault-tolerance, `RS-10-4-64k` would still be appropriate as long as there are at least 14 DataNodes in the cluster. + A system default EC policy can be configured via 'dfs.namenode.ec.system.default.policy' configuration. With this configuration, + the default EC policy will be used when no policy name is passed as an argument in the '-setPolicy' command. + + By default, the 'dfs.namenode.ec.system.default.policy' is "RS-6-3-64k". + The codec implementations for Reed-Solomon and XOR can be configured with the following client and DataNode configuration keys: `io.erasurecode.codec.rs.rawcoders` for the default RS codec, `io.erasurecode.codec.rs-legacy.rawcoders` for the legacy RS codec, @@ -167,6 +172,9 @@ Below are the details about each command. `path`: An directory in HDFS. This is a mandatory parameter. Setting a policy only affects newly created files, and does not affect existing files. `policyName`: The erasure coding policy to be used for files under this directory. + This parameter can be omitted if a 'dfs.namenode.ec.system.default.policy' configuration is set. + The EC policy of the path will be set with the default value in configuration. + * `[-getPolicy -path ]` diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java index 127dad1e03e..06edb1a4d72 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java @@ -209,9 +209,9 @@ public class TestErasureCodingPolicies { cluster.restartNameNodes(); cluster.waitActive(); - // No policies should be enabled after restart - Assert.assertTrue("No policies should be enabled after restart", - fs.getAllErasureCodingPolicies().isEmpty()); + // Only default policy should be enabled after restart + Assert.assertEquals("Only default policy should be enabled after restart", + 1, fs.getAllErasureCodingPolicies().size()); // Already set directory-level policies should still be in effect Path disabledPolicy = new Path(dir1, "afterDisabled"); @@ -359,6 +359,24 @@ public class TestErasureCodingPolicies { } } + @Test + public void testSetDefaultPolicy() + throws IOException { + String src = "/ecDir"; + final Path ecDir = new Path(src); + try { + fs.mkdir(ecDir, FsPermission.getDirDefault()); + fs.getClient().setErasureCodingPolicy(src, null); + String actualECPolicyName = fs.getClient(). + getErasureCodingPolicy(src).getName(); + String expectedECPolicyName = + conf.get(DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY, + DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY_DEFAULT); + assertEquals(expectedECPolicyName, actualECPolicyName); + } catch (Exception e) { + } + } + @Test public void testGetAllErasureCodingPolicies() throws Exception { Collection allECPolicies = fs diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEnabledECPolicies.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEnabledECPolicies.java index fe95734cb37..d769f8bc6b7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEnabledECPolicies.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEnabledECPolicies.java @@ -75,7 +75,7 @@ public class TestEnabledECPolicies { String defaultECPolicies = conf.get( DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_DEFAULT); - expectValidPolicy(defaultECPolicies, 0); + expectValidPolicy(defaultECPolicies, 1); } @Test @@ -98,10 +98,10 @@ public class TestEnabledECPolicies { String ecPolicyName = StripedFileTestUtil.getDefaultECPolicy().getName(); expectValidPolicy(ecPolicyName, 1); expectValidPolicy(ecPolicyName + ", ", 1); - expectValidPolicy(",", 0); + expectValidPolicy(",", 1); expectValidPolicy(", " + ecPolicyName, 1); - expectValidPolicy(" ", 0); - expectValidPolicy(" , ", 0); + expectValidPolicy(" ", 1); + expectValidPolicy(" , ", 1); } @Test @@ -147,7 +147,7 @@ public class TestEnabledECPolicies { Assert.assertTrue("Did not find specified EC policy " + p.getName(), found.contains(p.getName())); } - Assert.assertEquals(enabledPolicies.length, found.size()); + Assert.assertEquals(enabledPolicies.length, found.size()-1); // Check that getEnabledPolicyByName only returns enabled policies for (ErasureCodingPolicy p: SystemErasureCodingPolicies.getPolicies()) { if (found.contains(p.getName())) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testErasureCodingConf.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testErasureCodingConf.xml index 127effc616b..c68c6d653d8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testErasureCodingConf.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testErasureCodingConf.xml @@ -552,6 +552,41 @@ + + setPolicy : set erasure coding policy without given a specific policy name + + -fs NAMENODE -mkdir /ecdir + -fs NAMENODE -setPolicy -path /ecdir + + + -fs NAMENODE -rmdir /ecdir + + + + SubstringComparator + Set default erasure coding policy on /ecdir + + + + + + getPolicy: get the default policy after setPolicy without given a specific policy name + + -fs NAMENODE -mkdir /ecdir + -fs NAMENODE -setPolicy -path /ecdir + -fs NAMENODE -getPolicy -path /ecdir + + + -fs NAMENODE -rmdir /ecdir + + + + SubstringComparator + RS-6-3-64k + + + + getPolicy : illegal parameters - path is missing