From b4c23043d38325aad8af4488caca115eae789f07 Mon Sep 17 00:00:00 2001 From: Thomas Marquardt Date: Fri, 14 Sep 2018 22:34:19 +0000 Subject: [PATCH] HADOOP-15757. ABFS: remove dependency on common-codec Base64. Contributed by Da Zhou. --- .../src/config/checkstyle-suppressions.xml | 2 + .../fs/azurebfs/AzureBlobFileSystemStore.java | 6 +- ...se64StringConfigurationBasicValidator.java | 4 +- .../services/SharedKeyCredentials.java | 9 +- .../hadoop/fs/azurebfs/utils/Base64.java | 329 ++++++++++++++++++ ...TestAbfsConfigurationFieldsValidation.java | 7 +- .../TestConfigurationValidators.java | 7 +- 7 files changed, 344 insertions(+), 20 deletions(-) create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/Base64.java diff --git a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml index 751a22773d7..10cf77e0c2b 100644 --- a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml +++ b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml @@ -44,4 +44,6 @@ + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index e3bea3251d0..d16cf3622bb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -41,7 +41,6 @@ import java.util.Hashtable; import java.util.List; import java.util.Map; import java.util.Set; -import javax.xml.bind.DatatypeConverter; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -77,6 +76,7 @@ import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.fs.azurebfs.services.ExponentialRetryPolicy; import org.apache.hadoop.fs.azurebfs.services.SharedKeyCredentials; +import org.apache.hadoop.fs.azurebfs.utils.Base64; import org.apache.hadoop.fs.azurebfs.utils.UriUtils; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclStatus; @@ -865,7 +865,7 @@ public class AzureBlobFileSystemStore { throw new CharacterCodingException(); } - String encodedPropertyValue = DatatypeConverter.printBase64Binary(encoder.encode(CharBuffer.wrap(value)).array()); + String encodedPropertyValue = Base64.encode(encoder.encode(CharBuffer.wrap(value)).array()); commaSeparatedProperties.append(key) .append(AbfsHttpConstants.EQUAL) .append(encodedPropertyValue); @@ -903,7 +903,7 @@ public class AzureBlobFileSystemStore { throw new InvalidFileSystemPropertyException(xMsProperties); } - byte[] decodedValue = DatatypeConverter.parseBase64Binary(nameValue[1]); + byte[] decodedValue = Base64.decode(nameValue[1]); final String value; try { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/diagnostics/Base64StringConfigurationBasicValidator.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/diagnostics/Base64StringConfigurationBasicValidator.java index 6bb997bbc79..fc7d713cb41 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/diagnostics/Base64StringConfigurationBasicValidator.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/diagnostics/Base64StringConfigurationBasicValidator.java @@ -23,7 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.azurebfs.contracts.diagnostics.ConfigurationValidator; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; -import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.fs.azurebfs.utils.Base64; /** * String Base64 configuration value Validator. @@ -42,7 +42,7 @@ public class Base64StringConfigurationBasicValidator extends ConfigurationBasicV return result; } - if (!Base64.isBase64(configValue)) { + if (!Base64.validateIsBase64String(configValue)) { throw new InvalidConfigurationValueException(getConfigKey()); } return configValue; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/SharedKeyCredentials.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/SharedKeyCredentials.java index 105a1a2dbf6..9ab9e504506 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/SharedKeyCredentials.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/SharedKeyCredentials.java @@ -39,10 +39,9 @@ import java.util.TimeZone; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.codec.Charsets; -import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.utils.Base64; /** * Represents the shared key credentials used to access an Azure Storage @@ -52,8 +51,6 @@ public class SharedKeyCredentials { private static final int EXPECTED_BLOB_QUEUE_CANONICALIZED_STRING_LENGTH = 300; private static final Pattern CRLF = Pattern.compile("\r\n", Pattern.LITERAL); private static final String HMAC_SHA256 = "HmacSHA256"; - private static final Base64 BASE_64 = new Base64(); - /** * Stores a reference to the RFC1123 date/time pattern. */ @@ -73,7 +70,7 @@ public class SharedKeyCredentials { throw new IllegalArgumentException("Invalid account key."); } this.accountName = accountName; - this.accountKey = BASE_64.decode(accountKey); + this.accountKey = Base64.decode(accountKey); initializeMac(); } @@ -100,7 +97,7 @@ public class SharedKeyCredentials { synchronized (this) { hmac = hmacSha256.doFinal(utf8Bytes); } - return new String(BASE_64.encode(hmac), Charsets.UTF_8); + return Base64.encode(hmac); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/Base64.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/Base64.java new file mode 100644 index 00000000000..c1910060420 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/Base64.java @@ -0,0 +1,329 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +/** + * Base64 + */ +public final class Base64 { + /** + * The Base 64 Characters. + */ + private static final String BASE_64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + /** + * Decoded values, -1 is invalid character, -2 is = pad character. + */ + private static final byte[] DECODE_64 = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0-15 + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* + * 16- 31 + */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, /* + * 32- 47 + */ + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -2, -1, -1, /* + * 48- 63 + */ + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 64-79 */ + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* + * 80- 95 + */ + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* + * 96- 111 + */ + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1 /* + * 112- 127 + */ + }; + + /** + * Decodes a given Base64 string into its corresponding byte array. + * + * @param data + * the Base64 string, as a String object, to decode + * + * @return the corresponding decoded byte array + * @throws IllegalArgumentException + * If the string is not a valid base64 encoded string + */ + public static byte[] decode(final String data) { + if (data == null) { + throw new IllegalArgumentException("The data parameter is not a valid base64-encoded string."); + } + + int byteArrayLength = 3 * data.length() / 4; + + if (data.endsWith("==")) { + byteArrayLength -= 2; + } + else if (data.endsWith("=")) { + byteArrayLength -= 1; + } + + final byte[] retArray = new byte[byteArrayLength]; + int byteDex = 0; + int charDex = 0; + + for (; charDex < data.length(); charDex += 4) { + // get 4 chars, convert to 3 bytes + final int char1 = DECODE_64[(byte) data.charAt(charDex)]; + final int char2 = DECODE_64[(byte) data.charAt(charDex + 1)]; + final int char3 = DECODE_64[(byte) data.charAt(charDex + 2)]; + final int char4 = DECODE_64[(byte) data.charAt(charDex + 3)]; + + if (char1 < 0 || char2 < 0 || char3 == -1 || char4 == -1) { + // invalid character(-1), or bad padding (-2) + throw new IllegalArgumentException("The data parameter is not a valid base64-encoded string."); + } + + int tVal = char1 << 18; + tVal += char2 << 12; + tVal += (char3 & 0xff) << 6; + tVal += char4 & 0xff; + + if (char3 == -2) { + // two "==" pad chars, check bits 12-24 + tVal &= 0x00FFF000; + retArray[byteDex++] = (byte) (tVal >> 16 & 0xFF); + } + else if (char4 == -2) { + // one pad char "=" , check bits 6-24. + tVal &= 0x00FFFFC0; + retArray[byteDex++] = (byte) (tVal >> 16 & 0xFF); + retArray[byteDex++] = (byte) (tVal >> 8 & 0xFF); + + } + else { + // No pads take all 3 bytes, bits 0-24 + retArray[byteDex++] = (byte) (tVal >> 16 & 0xFF); + retArray[byteDex++] = (byte) (tVal >> 8 & 0xFF); + retArray[byteDex++] = (byte) (tVal & 0xFF); + } + } + return retArray; + } + + /** + * Decodes a given Base64 string into its corresponding byte array. + * + * @param data + * the Base64 string, as a String object, to decode + * + * @return the corresponding decoded byte array + * @throws IllegalArgumentException + * If the string is not a valid base64 encoded string + */ + public static Byte[] decodeAsByteObjectArray(final String data) { + int byteArrayLength = 3 * data.length() / 4; + + if (data.endsWith("==")) { + byteArrayLength -= 2; + } + else if (data.endsWith("=")) { + byteArrayLength -= 1; + } + + final Byte[] retArray = new Byte[byteArrayLength]; + int byteDex = 0; + int charDex = 0; + + for (; charDex < data.length(); charDex += 4) { + // get 4 chars, convert to 3 bytes + final int char1 = DECODE_64[(byte) data.charAt(charDex)]; + final int char2 = DECODE_64[(byte) data.charAt(charDex + 1)]; + final int char3 = DECODE_64[(byte) data.charAt(charDex + 2)]; + final int char4 = DECODE_64[(byte) data.charAt(charDex + 3)]; + + if (char1 < 0 || char2 < 0 || char3 == -1 || char4 == -1) { + // invalid character(-1), or bad padding (-2) + throw new IllegalArgumentException("The data parameter is not a valid base64-encoded string."); + } + + int tVal = char1 << 18; + tVal += char2 << 12; + tVal += (char3 & 0xff) << 6; + tVal += char4 & 0xff; + + if (char3 == -2) { + // two "==" pad chars, check bits 12-24 + tVal &= 0x00FFF000; + retArray[byteDex++] = (byte) (tVal >> 16 & 0xFF); + } + else if (char4 == -2) { + // one pad char "=" , check bits 6-24. + tVal &= 0x00FFFFC0; + retArray[byteDex++] = (byte) (tVal >> 16 & 0xFF); + retArray[byteDex++] = (byte) (tVal >> 8 & 0xFF); + + } + else { + // No pads take all 3 bytes, bits 0-24 + retArray[byteDex++] = (byte) (tVal >> 16 & 0xFF); + retArray[byteDex++] = (byte) (tVal >> 8 & 0xFF); + retArray[byteDex++] = (byte) (tVal & 0xFF); + } + } + return retArray; + } + + /** + * Encodes a byte array as a Base64 string. + * + * @param data + * the byte array to encode + * @return the Base64-encoded string, as a String object + */ + public static String encode(final byte[] data) { + final StringBuilder builder = new StringBuilder(); + final int dataRemainder = data.length % 3; + + int j = 0; + int n = 0; + for (; j < data.length; j += 3) { + + if (j < data.length - dataRemainder) { + n = ((data[j] & 0xFF) << 16) + ((data[j + 1] & 0xFF) << 8) + (data[j + 2] & 0xFF); + } + else { + if (dataRemainder == 1) { + n = (data[j] & 0xFF) << 16; + } + else if (dataRemainder == 2) { + n = ((data[j] & 0xFF) << 16) + ((data[j + 1] & 0xFF) << 8); + } + } + + // Left here for readability + // byte char1 = (byte) ((n >>> 18) & 0x3F); + // byte char2 = (byte) ((n >>> 12) & 0x3F); + // byte char3 = (byte) ((n >>> 6) & 0x3F); + // byte char4 = (byte) (n & 0x3F); + builder.append(BASE_64_CHARS.charAt((byte) ((n >>> 18) & 0x3F))); + builder.append(BASE_64_CHARS.charAt((byte) ((n >>> 12) & 0x3F))); + builder.append(BASE_64_CHARS.charAt((byte) ((n >>> 6) & 0x3F))); + builder.append(BASE_64_CHARS.charAt((byte) (n & 0x3F))); + } + + final int bLength = builder.length(); + + // append '=' to pad + if (data.length % 3 == 1) { + builder.replace(bLength - 2, bLength, "=="); + } + else if (data.length % 3 == 2) { + builder.replace(bLength - 1, bLength, "="); + } + + return builder.toString(); + } + + /** + * Encodes a byte array as a Base64 string. + * + * @param data + * the byte array to encode + * @return the Base64-encoded string, as a String object + */ + public static String encode(final Byte[] data) { + final StringBuilder builder = new StringBuilder(); + final int dataRemainder = data.length % 3; + + int j = 0; + int n = 0; + for (; j < data.length; j += 3) { + + if (j < data.length - dataRemainder) { + n = ((data[j] & 0xFF) << 16) + ((data[j + 1] & 0xFF) << 8) + (data[j + 2] & 0xFF); + } + else { + if (dataRemainder == 1) { + n = (data[j] & 0xFF) << 16; + } + else if (dataRemainder == 2) { + n = ((data[j] & 0xFF) << 16) + ((data[j + 1] & 0xFF) << 8); + } + } + + // Left here for readability + // byte char1 = (byte) ((n >>> 18) & 0x3F); + // byte char2 = (byte) ((n >>> 12) & 0x3F); + // byte char3 = (byte) ((n >>> 6) & 0x3F); + // byte char4 = (byte) (n & 0x3F); + builder.append(BASE_64_CHARS.charAt((byte) ((n >>> 18) & 0x3F))); + builder.append(BASE_64_CHARS.charAt((byte) ((n >>> 12) & 0x3F))); + builder.append(BASE_64_CHARS.charAt((byte) ((n >>> 6) & 0x3F))); + builder.append(BASE_64_CHARS.charAt((byte) (n & 0x3F))); + } + + final int bLength = builder.length(); + + // append '=' to pad + if (data.length % 3 == 1) { + builder.replace(bLength - 2, bLength, "=="); + } + else if (data.length % 3 == 2) { + builder.replace(bLength - 1, bLength, "="); + } + + return builder.toString(); + } + + /** + * Determines whether the given string contains only Base64 characters. + * + * @param data + * the string, as a String object, to validate + * @return true if data is a valid Base64 string, otherwise false + */ + public static boolean validateIsBase64String(final String data) { + + if (data == null || data.length() % 4 != 0) { + return false; + } + + for (int m = 0; m < data.length(); m++) { + final byte charByte = (byte) data.charAt(m); + + // pad char detected + if (DECODE_64[charByte] == -2) { + if (m < data.length() - 2) { + return false; + } + else if (m == data.length() - 2 && DECODE_64[(byte) data.charAt(m + 1)] != -2) { + return false; + } + } + + if (charByte < 0 || DECODE_64[charByte] == -1) { + return false; + } + } + + return true; + } + + /** + * Private Default Ctor. + */ + private Base64() { + // No op + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsConfigurationFieldsValidation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsConfigurationFieldsValidation.java index 50f7bd98bf3..eeed6cec872 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsConfigurationFieldsValidation.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsConfigurationFieldsValidation.java @@ -31,6 +31,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.annotations.ConfigurationValidati import org.apache.hadoop.fs.azurebfs.contracts.annotations.ConfigurationValidationAnnotations.LongConfigurationValidatorAnnotation; import org.apache.hadoop.fs.azurebfs.contracts.annotations.ConfigurationValidationAnnotations.Base64StringConfigurationValidatorAnnotation; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.ConfigurationPropertyNotFoundException; +import org.apache.hadoop.fs.azurebfs.utils.Base64; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SSL_CHANNEL_MODE_KEY; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_READ_BUFFER_SIZE; @@ -42,7 +43,6 @@ import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.D import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MAX_AZURE_BLOCK_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.AZURE_BLOCK_LOCATION_HOST_DEFAULT; -import org.apache.commons.codec.binary.Base64; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; @@ -97,10 +97,9 @@ public class TestAbfsConfigurationFieldsValidation { public TestAbfsConfigurationFieldsValidation() throws Exception { super(); - Base64 base64 = new Base64(); this.accountName = "testaccount1.blob.core.windows.net"; - this.encodedString = new String(base64.encode("base64Value".getBytes(Charsets.UTF_8)), Charsets.UTF_8); - this.encodedAccountKey = new String(base64.encode("someAccountKey".getBytes(Charsets.UTF_8)), Charsets.UTF_8); + this.encodedString = Base64.encode("base64Value".getBytes(Charsets.UTF_8)); + this.encodedAccountKey = Base64.encode("someAccountKey".getBytes(Charsets.UTF_8)); Configuration configuration = new Configuration(); configuration.addResource(TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME); configuration.set(INT_KEY, "1234565"); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/diagnostics/TestConfigurationValidators.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/diagnostics/TestConfigurationValidators.java index a9556ab1c0f..f02eadc9a04 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/diagnostics/TestConfigurationValidators.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/diagnostics/TestConfigurationValidators.java @@ -18,14 +18,11 @@ package org.apache.hadoop.fs.azurebfs.diagnostics; -import org.apache.commons.codec.Charsets; - import org.junit.Assert; import org.junit.Test; - import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; -import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.fs.azurebfs.utils.Base64; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MAX_BUFFER_SIZE; @@ -109,7 +106,7 @@ public class TestConfigurationValidators extends Assert { @Test public void testBase64StringConfigValidator() throws Exception { - String encodedVal = new String(new Base64().encode("someValue".getBytes()), Charsets.UTF_8); + String encodedVal = Base64.encode("someValue".getBytes()); Base64StringConfigurationBasicValidator base64StringConfigurationValidator = new Base64StringConfigurationBasicValidator(FAKE_KEY, "", false); assertEquals("", base64StringConfigurationValidator.validate(null));