From 54fd0e44b76c4b982dcfb47932b6159851f14136 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Mon, 8 May 2017 21:59:49 -0700 Subject: [PATCH] HDFS-11644. Support for querying outputstream capabilities. Contributed by Manoj Govindassamy. --- .../apache/hadoop/fs/FSDataOutputStream.java | 10 ++- .../apache/hadoop/fs/StreamCapabilities.java | 67 +++++++++++++++++++ .../apache/hadoop/hdfs/DFSOutputStream.java | 15 ++++- .../hadoop/hdfs/DFSStripedOutputStream.java | 11 ++- .../hadoop/hdfs/TestDFSOutputStream.java | 25 +++++++ .../hdfs/TestDFSStripedOutputStream.java | 24 ++++--- .../fs/azure/SyncableDataOutputStream.java | 12 +++- 7 files changed, 150 insertions(+), 14 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStream.java index 3f97ea89dd3..1d95cd3f713 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStream.java @@ -30,7 +30,7 @@ @InterfaceAudience.Public @InterfaceStability.Stable public class FSDataOutputStream extends DataOutputStream - implements Syncable, CanSetDropBehind { + implements Syncable, CanSetDropBehind, StreamCapabilities { private final OutputStream wrappedStream; private static class PositionCache extends FilterOutputStream { @@ -111,6 +111,14 @@ public OutputStream getWrappedStream() { return wrappedStream; } + @Override + public boolean hasCapability(String capability) { + if (wrappedStream instanceof StreamCapabilities) { + return ((StreamCapabilities) wrappedStream).hasCapability(capability); + } + return false; + } + @Override // Syncable public void hflush() throws IOException { if (wrappedStream instanceof Syncable) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java new file mode 100644 index 00000000000..65aa67988a8 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Interface to query streams for supported capabilities. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface StreamCapabilities { + /** + * Capabilities that a stream can support and be queried for. + */ + enum StreamCapability { + /** + * Stream hflush capability to flush out the data in client's buffer. + * Streams with this capability implement {@link Syncable} and support + * {@link Syncable#hflush()}. + */ + HFLUSH("hflush"), + + /** + * Stream hsync capability to flush out the data in client's buffer and + * the disk device. Streams with this capability implement {@link Syncable} + * and support {@link Syncable#hsync()}. + */ + HSYNC("hsync"); + + private final String capability; + + StreamCapability(String value) { + this.capability = value; + } + + public final String getValue() { + return capability; + } + } + + /** + * Query the stream for a specific capability. + * + * @param capability string to query the stream support for. + * @return True if the stream supports capability. + */ + boolean hasCapability(String capability); +} + diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java index ceaefd80b14..83f1425a43d 100755 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hdfs; +import static org.apache.hadoop.fs.StreamCapabilities.StreamCapability.HFLUSH; +import static org.apache.hadoop.fs.StreamCapabilities.StreamCapability.HSYNC; + import java.io.FileNotFoundException; import java.io.IOException; import java.io.InterruptedIOException; @@ -34,6 +37,7 @@ import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileEncryptionInfo; import org.apache.hadoop.fs.ParentNotDirectoryException; +import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.Syncable; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; @@ -90,7 +94,7 @@ ****************************************************************/ @InterfaceAudience.Private public class DFSOutputStream extends FSOutputSummer - implements Syncable, CanSetDropBehind { + implements Syncable, CanSetDropBehind, StreamCapabilities { static final Logger LOG = LoggerFactory.getLogger(DFSOutputStream.class); /** * Number of times to retry creating a file when there are transient @@ -546,6 +550,15 @@ void endBlock() throws IOException { } } + @Override + public boolean hasCapability(String capability) { + if (capability.equalsIgnoreCase(HSYNC.getValue()) || + capability.equalsIgnoreCase((HFLUSH.getValue()))) { + return true; + } + return false; + } + /** * Flushes out to all replicas of the block. The data is in the buffers * of the DNs but not necessarily in the DN's OS buffers. diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java index 0fdae8c0f8f..2aa9e98c953 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java @@ -46,6 +46,7 @@ import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.CreateFlag; +import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.DatanodeID; @@ -77,8 +78,8 @@ * Each stripe contains a sequence of cells. */ @InterfaceAudience.Private -public class DFSStripedOutputStream extends DFSOutputStream { - +public class DFSStripedOutputStream extends DFSOutputStream + implements StreamCapabilities { private static final ByteBufferPool BUFFER_POOL = new ElasticByteBufferPool(); static class MultipleBlockingQueue { @@ -809,6 +810,12 @@ private int stripeDataSize() { return numDataBlocks * cellSize; } + @Override + public boolean hasCapability(String capability) { + // StreamCapabilities like hsync / hflush are not supported yet. + return false; + } + @Override public void hflush() { // not supported yet diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSOutputStream.java index 52e3bb4f58d..f281a3b2120 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSOutputStream.java @@ -17,9 +17,11 @@ */ package org.apache.hadoop.hdfs; +import java.io.ByteArrayInputStream; import java.io.DataOutputStream; import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.lang.reflect.Field; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; @@ -32,8 +34,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsTracer; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StreamCapabilities.StreamCapability; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DataStreamer.LastExceptionInStreamer; import org.apache.hadoop.hdfs.client.impl.DfsClientConf; @@ -48,6 +52,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.PathUtils; import org.apache.htrace.core.SpanId; @@ -55,6 +60,8 @@ import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; + +import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.anyBoolean; import static org.mockito.Matchers.anyLong; import org.mockito.Mockito; @@ -346,6 +353,24 @@ public void testEndLeaseCall() throws Exception { verify(spyClient, times(1)).endFileLease(anyLong()); } + @Test + public void testStreamFlush() throws Exception { + FileSystem fs = cluster.getFileSystem(); + FSDataOutputStream os = fs.create(new Path("/normal-file")); + // Verify output stream supports hsync() and hflush(). + assertTrue("DFSOutputStream should support hflush()!", + os.hasCapability(StreamCapability.HFLUSH.getValue())); + assertTrue("DFSOutputStream should support hsync()!", + os.hasCapability(StreamCapability.HSYNC.getValue())); + byte[] bytes = new byte[1024]; + InputStream is = new ByteArrayInputStream(bytes); + IOUtils.copyBytes(is, os, bytes.length); + os.hflush(); + IOUtils.copyBytes(is, os, bytes.length); + os.hsync(); + os.close(); + } + @AfterClass public static void tearDown() { if (cluster != null) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java index 70309c95fe1..c0cfea22007 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs; -import static org.apache.hadoop.fs.contract.ContractTestUtils.fail; +import static org.junit.Assert.assertFalse; import java.io.ByteArrayInputStream; import java.io.IOException; @@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StreamCapabilities.StreamCapability; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.io.IOUtils; @@ -195,14 +196,19 @@ public void testFileMoreThanABlockGroup3() throws Exception { public void testStreamFlush() throws Exception { final byte[] bytes = StripedFileTestUtil.generateBytes(blockSize * dataBlocks * 3 + cellSize * dataBlocks + cellSize + 123); - try (FSDataOutputStream os = fs.create(new Path("/ec-file-1"))) { - InputStream is = new ByteArrayInputStream(bytes); - IOUtils.copyBytes(is, os, bytes.length); - os.hflush(); - os.hsync(); - } catch (Exception e) { - fail("hflush()/hsync() on striped file output stream failed!", e); - } + FSDataOutputStream os = fs.create(new Path("/ec-file-1")); + assertFalse("DFSStripedOutputStream should not have hflush() " + + "capability yet!", os.hasCapability( + StreamCapability.HFLUSH.getValue())); + assertFalse("DFSStripedOutputStream should not have hsync() " + + "capability yet!", os.hasCapability( + StreamCapability.HSYNC.getValue())); + InputStream is = new ByteArrayInputStream(bytes); + IOUtils.copyBytes(is, os, bytes.length); + os.hflush(); + IOUtils.copyBytes(is, os, bytes.length); + os.hsync(); + os.close(); } private void testOneFile(String src, int writeBytes) throws Exception { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SyncableDataOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SyncableDataOutputStream.java index 9bec7a5c2ce..a52fdb719ab 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SyncableDataOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SyncableDataOutputStream.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.OutputStream; +import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.Syncable; /** @@ -30,12 +31,21 @@ * wrapped stream passed in to the constructor. This is required * for HBase when wrapping a PageBlobOutputStream used as a write-ahead log. */ -public class SyncableDataOutputStream extends DataOutputStream implements Syncable { +public class SyncableDataOutputStream extends DataOutputStream + implements Syncable, StreamCapabilities { public SyncableDataOutputStream(OutputStream out) { super(out); } + @Override + public boolean hasCapability(String capability) { + if (out instanceof StreamCapabilities) { + return ((StreamCapabilities) out).hasCapability(capability); + } + return false; + } + @Override public void hflush() throws IOException { if (out instanceof Syncable) {