From 67204f2834b2e72a85e014087db1fcb7d5e9cdce Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Thu, 4 Aug 2016 09:45:03 -0500 Subject: [PATCH] HDFS-10662. Optimize UTF8 string/byte conversions. Contributed by Daryn Sharp. (cherry picked from commit 6ae39199dac6ac7be6802b31452552c76da16e24) Conflicts: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java (cherry picked from commit 77b61d1f4e34de9fed3dd6167fa93bb35e85e038) (cherry picked from commit 58df27b87388dbb9729862a58dcdbe0b57e8b1ab) --- .../java/org/apache/hadoop/hdfs/DFSUtil.java | 24 +++++++++++++------ .../hdfs/server/namenode/FSDirMkdirOp.java | 5 ++-- .../namenode/FSDirStatAndListingOp.java | 3 +-- .../hdfs/server/namenode/FSDirXAttrOp.java | 4 ++-- .../hdfs/server/namenode/FSDirectory.java | 3 +-- 5 files changed, 23 insertions(+), 16 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java index a60ad8e76df..bbe11bcade4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java @@ -37,7 +37,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SERVER_HTTPS_KEYPASSWORD_ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SERVER_HTTPS_KEYSTORE_PASSWORD_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SERVER_HTTPS_TRUSTSTORE_PASSWORD_KEY; -import com.google.common.base.Charsets; import java.io.IOException; import java.io.PrintStream; import java.io.UnsupportedEncodingException; @@ -45,6 +44,7 @@ import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.URI; import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; import java.security.SecureRandom; import java.text.SimpleDateFormat; import java.util.Arrays; @@ -313,7 +313,11 @@ public class DFSUtil { public static String bytes2String(byte[] bytes) { return bytes2String(bytes, 0, bytes.length); } - + + // Using the charset canonical name for String/byte[] conversions is much + // more efficient due to use of cached encoders/decoders. + private static final String UTF8_CSN = StandardCharsets.UTF_8.name(); + /** * Decode a specific range of bytes of the given byte array to a string * using UTF8. @@ -325,18 +329,24 @@ public class DFSUtil { */ public static String bytes2String(byte[] bytes, int offset, int length) { try { - return new String(bytes, offset, length, "UTF8"); - } catch(UnsupportedEncodingException e) { - assert false : "UTF8 encoding is not supported "; + return new String(bytes, offset, length, UTF8_CSN); + } catch (UnsupportedEncodingException e) { + // should never happen! + throw new IllegalArgumentException("UTF8 encoding is not supported", e); } - return null; } /** * Converts a string to a byte array using UTF8 encoding. */ public static byte[] string2Bytes(String str) { - return str.getBytes(Charsets.UTF_8); + try { + return str.getBytes(UTF8_CSN); + } catch (UnsupportedEncodingException e) { + // should never happen! + throw new IllegalArgumentException("UTF8 decoding is not supported", e); + } + } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirMkdirOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirMkdirOp.java index f51427f2b30..c74facaf6a7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirMkdirOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirMkdirOp.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdfs.server.namenode; import com.google.common.base.Preconditions; -import org.apache.commons.io.Charsets; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.InvalidPathException; import org.apache.hadoop.fs.UnresolvedLinkException; @@ -121,7 +120,7 @@ class FSDirMkdirOp { static Map.Entry createAncestorDirectories( FSDirectory fsd, INodesInPath iip, PermissionStatus permission) throws IOException { - final String last = new String(iip.getLastLocalName(), Charsets.UTF_8); + final String last = DFSUtil.bytes2String(iip.getLastLocalName()); INodesInPath existing = iip.getExistingINodes(); List children = iip.getPath(existing.length(), iip.length() - existing.length()); @@ -189,7 +188,7 @@ class FSDirMkdirOp { throws IOException { assert fsd.hasWriteLock(); existing = unprotectedMkdir(fsd, fsd.allocateNewInodeId(), existing, - localName.getBytes(Charsets.UTF_8), perm, null, now()); + DFSUtil.string2Bytes(localName), perm, null, now()); if (existing == null) { return null; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java index 850b3bd6c51..7cf69d2ce9f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hdfs.server.namenode; import com.google.common.base.Preconditions; -import org.apache.commons.io.Charsets; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException; import org.apache.hadoop.fs.FileEncryptionInfo; @@ -51,7 +50,7 @@ class FSDirStatAndListingOp { FSPermissionChecker pc = fsd.getPermissionChecker(); byte[][] pathComponents = FSDirectory .getPathComponentsForReservedPath(srcArg); - final String startAfterString = new String(startAfter, Charsets.UTF_8); + final String startAfterString = DFSUtil.bytes2String(startAfter); final String src = fsd.resolvePath(pc, srcArg, pathComponents); final INodesInPath iip = fsd.getINodesInPath(src, true); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java index 4d97872c51f..093d2711eb2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdfs.server.namenode; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Charsets; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.hadoop.HadoopIllegalArgumentException; @@ -26,6 +25,7 @@ import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.fs.XAttrSetFlag; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.XAttrHelper; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos; @@ -421,7 +421,7 @@ class FSDirXAttrOp { if (fsd.getXattrMaxSize() == 0) { return; } - int size = xAttr.getName().getBytes(Charsets.UTF_8).length; + int size = DFSUtil.string2Bytes(xAttr.getName()).length; if (xAttr.getValue() != null) { size += xAttr.getValue().length; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index 7f8b1857bef..20b060f7a4c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -21,7 +21,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.protobuf.InvalidProtocolBufferException; -import org.apache.commons.io.Charsets; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -417,7 +416,7 @@ public class FSDirectory implements Closeable { long modTime = now(); INodeFile newNode = newINodeFile(allocateNewInodeId(), permissions, modTime, modTime, replication, preferredBlockSize); - newNode.setLocalName(localName.getBytes(Charsets.UTF_8)); + newNode.setLocalName(DFSUtil.string2Bytes(localName)); newNode.toUnderConstruction(clientName, clientMachine); INodesInPath newiip;