From ff5ec3b841612f7f28ad8be5bbfec4168a8ac1f0 Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Thu, 4 May 2017 17:27:44 -0700 Subject: [PATCH] HADOOP-14383. Implement FileSystem that reads from HTTP / HTTPS endpoints. --- hadoop-common-project/hadoop-common/pom.xml | 5 + .../fs/http/AbstractHttpFileSystem.java | 153 ++++++++++++++++++ .../apache/hadoop/fs/http/HttpFileSystem.java | 28 ++++ .../hadoop/fs/http/HttpsFileSystem.java | 28 ++++ .../apache/hadoop/fs/http/package-info.java | 23 +++ .../services/org.apache.hadoop.fs.FileSystem | 2 + .../hadoop/fs/http/TestHttpFileSystem.java | 67 ++++++++ hadoop-project/pom.xml | 6 + 8 files changed, 312 insertions(+) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/AbstractHttpFileSystem.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/HttpFileSystem.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/HttpsFileSystem.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/package-info.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/http/TestHttpFileSystem.java diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index f76575d31cb..e8b53174039 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -318,6 +318,11 @@ aalto-xml compile + + com.squareup.okhttp3 + mockwebserver + test + diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/AbstractHttpFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/AbstractHttpFileSystem.java new file mode 100644 index 00000000000..fa0b2cf6c31 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/AbstractHttpFileSystem.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.http; + + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PositionedReadable; +import org.apache.hadoop.fs.Seekable; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.Progressable; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.net.URLConnection; + +abstract class AbstractHttpFileSystem extends FileSystem { + private static final long DEFAULT_BLOCK_SIZE = 4096; + private static final Path WORKING_DIR = new Path("/"); + + private URI uri; + + @Override + public void initialize(URI name, Configuration conf) throws IOException { + super.initialize(name, conf); + this.uri = name; + } + + public abstract String getScheme(); + + @Override + public URI getUri() { + return uri; + } + + @Override + public FSDataInputStream open(Path path, int bufferSize) throws IOException { + URLConnection conn = path.toUri().toURL().openConnection(); + InputStream in = conn.getInputStream(); + return new FSDataInputStream(new HttpDataInputStream(in)); + } + + @Override + public FSDataOutputStream create(Path path, FsPermission fsPermission, + boolean b, int i, short i1, long l, + Progressable progressable) + throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public FSDataOutputStream append(Path path, int i, Progressable progressable) + throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean rename(Path path, Path path1) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean delete(Path path, boolean b) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public FileStatus[] listStatus(Path path) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void setWorkingDirectory(Path path) { + } + + @Override + public Path getWorkingDirectory() { + return WORKING_DIR; + } + + @Override + public boolean mkdirs(Path path, FsPermission fsPermission) + throws IOException { + return false; + } + + @Override + public FileStatus getFileStatus(Path path) throws IOException { + return new FileStatus(-1, false, 1, DEFAULT_BLOCK_SIZE, 0, path); + } + + private static class HttpDataInputStream extends FilterInputStream + implements Seekable, PositionedReadable { + + HttpDataInputStream(InputStream in) { + super(in); + } + + @Override + public int read(long position, byte[] buffer, int offset, int length) + throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void readFully(long position, byte[] buffer, int offset, int length) + throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void readFully(long position, byte[] buffer) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void seek(long pos) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long getPos() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean seekToNewSource(long targetPos) throws IOException { + throw new UnsupportedOperationException(); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/HttpFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/HttpFileSystem.java new file mode 100644 index 00000000000..a4d1505012e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/HttpFileSystem.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.http; + +/** + * A Filesystem that reads from HTTP endpoint. + */ +public class HttpFileSystem extends AbstractHttpFileSystem { + @Override + public String getScheme() { + return "http"; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/HttpsFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/HttpsFileSystem.java new file mode 100644 index 00000000000..88e0968453d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/HttpsFileSystem.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.http; + +/** + * A Filesystem that reads from HTTPS endpoint. + */ +public class HttpsFileSystem extends AbstractHttpFileSystem { + @Override + public String getScheme() { + return "https"; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/package-info.java new file mode 100644 index 00000000000..a5b5206ba83 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/http/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Filesystem implementations that allow Hadoop to read directly from + * HTTP / HTTPS endpoints. + */ +package org.apache.hadoop.fs.http; \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem b/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem index 17ffa7fe720..cbf2d6d0935 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem +++ b/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem @@ -17,3 +17,5 @@ org.apache.hadoop.fs.LocalFileSystem org.apache.hadoop.fs.viewfs.ViewFileSystem org.apache.hadoop.fs.ftp.FTPFileSystem org.apache.hadoop.fs.HarFileSystem +org.apache.hadoop.fs.http.HttpFileSystem +org.apache.hadoop.fs.http.HttpsFileSystem diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/http/TestHttpFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/http/TestHttpFileSystem.java new file mode 100644 index 00000000000..0902c04c79b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/http/TestHttpFileSystem.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.http; + +import okhttp3.mockwebserver.MockResponse; +import okhttp3.mockwebserver.MockWebServer; +import okhttp3.mockwebserver.RecordedRequest; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IOUtils; +import org.junit.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.charset.StandardCharsets; + +import static org.junit.Assert.assertEquals; + +/** + * Testing HttpFileSystem. + */ +public class TestHttpFileSystem { + @Test + public void testHttpFileSystem() throws IOException, URISyntaxException, + InterruptedException { + Configuration conf = new Configuration(false); + conf.set("fs.http.impl", HttpFileSystem.class.getCanonicalName()); + final String data = "foo"; + + try (MockWebServer server = new MockWebServer()) { + server.enqueue(new MockResponse().setBody(data)); + server.start(); + URI uri = URI.create(String.format("http://%s:%d", server.getHostName(), + server.getPort())); + FileSystem fs = FileSystem.get(uri, conf); + try (InputStream is = fs.open( + new Path(new URL(uri.toURL(), "/foo").toURI()), + 4096)) { + byte[] buf = new byte[data.length()]; + IOUtils.readFully(is, buf, 0, buf.length); + assertEquals(data, new String(buf, StandardCharsets.UTF_8)); + } + RecordedRequest req = server.takeRequest(); + assertEquals("/foo", req.getPath()); + } + } +} diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index fdb4fb1c75d..c9b6522e031 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -145,6 +145,12 @@ okhttp 2.4.0 + + com.squareup.okhttp3 + mockwebserver + 3.7.0 + test + jdiff jdiff