HADOOP-4687 Moving src directories on branch
git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/HADOOP-4687/core@776174 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
5128a9a453
|
@ -0,0 +1,444 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
|
||||
<!-- Do not modify this file directly. Instead, copy entries that you -->
|
||||
<!-- wish to modify from this file into core-site.xml and change them -->
|
||||
<!-- there. If core-site.xml does not already exist, create it. -->
|
||||
|
||||
<configuration>
|
||||
|
||||
<!--- global properties -->
|
||||
|
||||
<property>
|
||||
<name>hadoop.tmp.dir</name>
|
||||
<value>/tmp/hadoop-${user.name}</value>
|
||||
<description>A base for other temporary directories.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.native.lib</name>
|
||||
<value>true</value>
|
||||
<description>Should native hadoop libraries, if present, be used.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.http.filter.initializers</name>
|
||||
<value></value>
|
||||
<description>A comma separated list of class names. Each class in the list
|
||||
must extend org.apache.hadoop.http.FilterInitializer. The corresponding
|
||||
Filter will be initialized. Then, the Filter will be applied to all user
|
||||
facing jsp and servlet web pages. The ordering of the list defines the
|
||||
ordering of the filters.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.authorization</name>
|
||||
<value>false</value>
|
||||
<description>Is service-level authorization enabled?</description>
|
||||
</property>
|
||||
|
||||
<!--- logging properties -->
|
||||
|
||||
<property>
|
||||
<name>hadoop.logfile.size</name>
|
||||
<value>10000000</value>
|
||||
<description>The max size of each log file</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.logfile.count</name>
|
||||
<value>10</value>
|
||||
<description>The max number of log files</description>
|
||||
</property>
|
||||
|
||||
<!-- i/o properties -->
|
||||
<property>
|
||||
<name>io.file.buffer.size</name>
|
||||
<value>4096</value>
|
||||
<description>The size of buffer for use in sequence files.
|
||||
The size of this buffer should probably be a multiple of hardware
|
||||
page size (4096 on Intel x86), and it determines how much data is
|
||||
buffered during read and write operations.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.bytes.per.checksum</name>
|
||||
<value>512</value>
|
||||
<description>The number of bytes per checksum. Must not be larger than
|
||||
io.file.buffer.size.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.skip.checksum.errors</name>
|
||||
<value>false</value>
|
||||
<description>If true, when a checksum error is encountered while
|
||||
reading a sequence file, entries are skipped, instead of throwing an
|
||||
exception.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.compression.codecs</name>
|
||||
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value>
|
||||
<description>A list of the compression codec classes that can be used
|
||||
for compression/decompression.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.serializations</name>
|
||||
<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
|
||||
<description>A list of serialization classes that can be used for
|
||||
obtaining serializers and deserializers.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.seqfile.local.dir</name>
|
||||
<value>${hadoop.tmp.dir}/io/local</value>
|
||||
<description>The local directory where sequence file stores intermediate
|
||||
data files during merge. May be a comma-separated list of
|
||||
directories on different devices in order to spread disk i/o.
|
||||
Directories that do not exist are ignored.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<!-- file system properties -->
|
||||
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>file:///</value>
|
||||
<description>The name of the default file system. A URI whose
|
||||
scheme and authority determine the FileSystem implementation. The
|
||||
uri's scheme determines the config property (fs.SCHEME.impl) naming
|
||||
the FileSystem implementation class. The uri's authority is used to
|
||||
determine the host, port, etc. for a filesystem.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.trash.interval</name>
|
||||
<value>0</value>
|
||||
<description>Number of minutes between trash checkpoints.
|
||||
If zero, the trash feature is disabled.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.file.impl</name>
|
||||
<value>org.apache.hadoop.fs.LocalFileSystem</value>
|
||||
<description>The FileSystem for file: uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.hdfs.impl</name>
|
||||
<value>org.apache.hadoop.hdfs.DistributedFileSystem</value>
|
||||
<description>The FileSystem for hdfs: uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3.impl</name>
|
||||
<value>org.apache.hadoop.fs.s3.S3FileSystem</value>
|
||||
<description>The FileSystem for s3: uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3n.impl</name>
|
||||
<value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value>
|
||||
<description>The FileSystem for s3n: (Native S3) uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.kfs.impl</name>
|
||||
<value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
|
||||
<description>The FileSystem for kfs: uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.hftp.impl</name>
|
||||
<value>org.apache.hadoop.hdfs.HftpFileSystem</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.hsftp.impl</name>
|
||||
<value>org.apache.hadoop.hdfs.HsftpFileSystem</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.ftp.impl</name>
|
||||
<value>org.apache.hadoop.fs.ftp.FTPFileSystem</value>
|
||||
<description>The FileSystem for ftp: uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.ramfs.impl</name>
|
||||
<value>org.apache.hadoop.fs.InMemoryFileSystem</value>
|
||||
<description>The FileSystem for ramfs: uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.har.impl</name>
|
||||
<value>org.apache.hadoop.fs.HarFileSystem</value>
|
||||
<description>The filesystem for Hadoop archives. </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.checkpoint.dir</name>
|
||||
<value>${hadoop.tmp.dir}/dfs/namesecondary</value>
|
||||
<description>Determines where on the local filesystem the DFS secondary
|
||||
name node should store the temporary images to merge.
|
||||
If this is a comma-delimited list of directories then the image is
|
||||
replicated in all of the directories for redundancy.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.checkpoint.edits.dir</name>
|
||||
<value>${fs.checkpoint.dir}</value>
|
||||
<description>Determines where on the local filesystem the DFS secondary
|
||||
name node should store the temporary edits to merge.
|
||||
If this is a comma-delimited list of directoires then teh edits is
|
||||
replicated in all of the directoires for redundancy.
|
||||
Default value is same as fs.checkpoint.dir
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.checkpoint.period</name>
|
||||
<value>3600</value>
|
||||
<description>The number of seconds between two periodic checkpoints.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.checkpoint.size</name>
|
||||
<value>67108864</value>
|
||||
<description>The size of the current edit log (in bytes) that triggers
|
||||
a periodic checkpoint even if the fs.checkpoint.period hasn't expired.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
|
||||
<property>
|
||||
<name>fs.s3.block.size</name>
|
||||
<value>67108864</value>
|
||||
<description>Block size to use when writing files to S3.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3.buffer.dir</name>
|
||||
<value>${hadoop.tmp.dir}/s3</value>
|
||||
<description>Determines where on the local filesystem the S3 filesystem
|
||||
should store files before sending them to S3
|
||||
(or after retrieving them from S3).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3.maxRetries</name>
|
||||
<value>4</value>
|
||||
<description>The maximum number of retries for reading or writing files to S3,
|
||||
before we signal failure to the application.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3.sleepTimeSeconds</name>
|
||||
<value>10</value>
|
||||
<description>The number of seconds to sleep between each S3 retry.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>local.cache.size</name>
|
||||
<value>10737418240</value>
|
||||
<description>The limit on the size of cache you want to keep, set by default
|
||||
to 10GB. This will act as a soft limit on the cache directory for out of band data.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.seqfile.compress.blocksize</name>
|
||||
<value>1000000</value>
|
||||
<description>The minimum block size for compression in block compressed
|
||||
SequenceFiles.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.seqfile.lazydecompress</name>
|
||||
<value>true</value>
|
||||
<description>Should values of block-compressed SequenceFiles be decompressed
|
||||
only when necessary.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.seqfile.sorter.recordlimit</name>
|
||||
<value>1000000</value>
|
||||
<description>The limit on number of records to be kept in memory in a spill
|
||||
in SequenceFiles.Sorter
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.mapfile.bloom.size</name>
|
||||
<value>1048576</value>
|
||||
<description>The size of BloomFilter-s used in BloomMapFile. Each time this many
|
||||
keys is appended the next BloomFilter will be created (inside a DynamicBloomFilter).
|
||||
Larger values minimize the number of filters, which slightly increases the performance,
|
||||
but may waste too much space if the total number of keys is usually much smaller
|
||||
than this number.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.mapfile.bloom.error.rate</name>
|
||||
<value>0.005</value>
|
||||
<description>The rate of false positives in BloomFilter-s used in BloomMapFile.
|
||||
As this value decreases, the size of BloomFilter-s increases exponentially. This
|
||||
value is the probability of encountering false positives (default is 0.5%).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.util.hash.type</name>
|
||||
<value>murmur</value>
|
||||
<description>The default implementation of Hash. Currently this can take one of the
|
||||
two values: 'murmur' to select MurmurHash and 'jenkins' to select JenkinsHash.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
<!-- ipc properties -->
|
||||
|
||||
<property>
|
||||
<name>ipc.client.idlethreshold</name>
|
||||
<value>4000</value>
|
||||
<description>Defines the threshold number of connections after which
|
||||
connections will be inspected for idleness.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.kill.max</name>
|
||||
<value>10</value>
|
||||
<description>Defines the maximum number of clients to disconnect in one go.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connection.maxidletime</name>
|
||||
<value>10000</value>
|
||||
<description>The maximum time in msec after which a client will bring down the
|
||||
connection to the server.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connect.max.retries</name>
|
||||
<value>10</value>
|
||||
<description>Indicates the number of retries a client will make to establish
|
||||
a server connection.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.server.listen.queue.size</name>
|
||||
<value>128</value>
|
||||
<description>Indicates the length of the listen queue for servers accepting
|
||||
client connections.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.server.tcpnodelay</name>
|
||||
<value>false</value>
|
||||
<description>Turn on/off Nagle's algorithm for the TCP socket connection on
|
||||
the server. Setting to true disables the algorithm and may decrease latency
|
||||
with a cost of more/smaller packets.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.tcpnodelay</name>
|
||||
<value>false</value>
|
||||
<description>Turn on/off Nagle's algorithm for the TCP socket connection on
|
||||
the client. Setting to true disables the algorithm and may decrease latency
|
||||
with a cost of more/smaller packets.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
<!-- Web Interface Configuration -->
|
||||
|
||||
<property>
|
||||
<name>webinterface.private.actions</name>
|
||||
<value>false</value>
|
||||
<description> If set to true, the web interfaces of JT and NN may contain
|
||||
actions, such as kill job, delete file, etc., that should
|
||||
not be exposed to public. Enable this option if the interfaces
|
||||
are only reachable by those who have the right authorization.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<!-- Proxy Configuration -->
|
||||
|
||||
<property>
|
||||
<name>hadoop.rpc.socket.factory.class.default</name>
|
||||
<value>org.apache.hadoop.net.StandardSocketFactory</value>
|
||||
<description> Default SocketFactory to use. This parameter is expected to be
|
||||
formatted as "package.FactoryClassName".
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
|
||||
<value></value>
|
||||
<description> SocketFactory to use to connect to a DFS. If null or empty, use
|
||||
hadoop.rpc.socket.class.default. This socket factory is also used by
|
||||
DFSClient to create sockets to DataNodes.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
|
||||
<property>
|
||||
<name>hadoop.socks.server</name>
|
||||
<value></value>
|
||||
<description> Address (host:port) of the SOCKS server to be used by the
|
||||
SocksSocketFactory.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<!-- Rack Configuration -->
|
||||
|
||||
<property>
|
||||
<name>topology.node.switch.mapping.impl</name>
|
||||
<value>org.apache.hadoop.net.ScriptBasedMapping</value>
|
||||
<description> The default implementation of the DNSToSwitchMapping. It
|
||||
invokes a script specified in topology.script.file.name to resolve
|
||||
node names. If the value for topology.script.file.name is not set, the
|
||||
default value of DEFAULT_RACK is returned for all node names.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>topology.script.file.name</name>
|
||||
<value></value>
|
||||
<description> The script name that should be invoked to resolve DNS names to
|
||||
NetworkTopology names. Example: the script would take host.foo.bar as an
|
||||
argument, and return /rack1 as the output.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>topology.script.number.args</name>
|
||||
<value>100</value>
|
||||
<description> The max number of args that the script configured with
|
||||
topology.script.file.name should be run with. Each arg is an
|
||||
IP address.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
|
||||
</configuration>
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* A package attribute that captures the version of Hadoop that was compiled.
|
||||
*/
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.PACKAGE)
|
||||
public @interface HadoopVersionAnnotation {
|
||||
|
||||
/**
|
||||
* Get the Hadoop version
|
||||
* @return the version string "0.6.3-dev"
|
||||
*/
|
||||
String version();
|
||||
|
||||
/**
|
||||
* Get the username that compiled Hadoop.
|
||||
*/
|
||||
String user();
|
||||
|
||||
/**
|
||||
* Get the date when Hadoop was compiled.
|
||||
* @return the date in unix 'date' format
|
||||
*/
|
||||
String date();
|
||||
|
||||
/**
|
||||
* Get the url for the subversion repository.
|
||||
*/
|
||||
String url();
|
||||
|
||||
/**
|
||||
* Get the subversion revision.
|
||||
* @return the revision number as a string (eg. "451451")
|
||||
*/
|
||||
String revision();
|
||||
|
||||
/**
|
||||
* Get the branch from which this was compiled.
|
||||
* @return The branch name, e.g. "trunk" or "branches/branch-0.20"
|
||||
*/
|
||||
String branch();
|
||||
|
||||
/**
|
||||
* Get a checksum of the source files from which
|
||||
* Hadoop was compiled.
|
||||
* @return a string that uniquely identifies the source
|
||||
**/
|
||||
String srcChecksum();
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.conf;
|
||||
|
||||
/** Something that may be configured with a {@link Configuration}. */
|
||||
public interface Configurable {
|
||||
|
||||
/** Set the configuration to be used by this object. */
|
||||
void setConf(Configuration conf);
|
||||
|
||||
/** Return the configuration used by this object. */
|
||||
Configuration getConf();
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,46 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.conf;
|
||||
|
||||
/** Base class for things that may be configured with a {@link Configuration}. */
|
||||
public class Configured implements Configurable {
|
||||
|
||||
private Configuration conf;
|
||||
|
||||
/** Construct a Configured. */
|
||||
public Configured() {
|
||||
this(null);
|
||||
}
|
||||
|
||||
/** Construct a Configured. */
|
||||
public Configured(Configuration conf) {
|
||||
setConf(conf);
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
public void setConf(Configuration conf) {
|
||||
this.conf = conf;
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
public Configuration getConf() {
|
||||
return conf;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
<html>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<body>
|
||||
Configuration of system parameters.
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,879 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.filecache;
|
||||
|
||||
import org.apache.commons.logging.*;
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import org.apache.hadoop.conf.*;
|
||||
import org.apache.hadoop.util.*;
|
||||
import org.apache.hadoop.fs.*;
|
||||
|
||||
import java.net.URI;
|
||||
|
||||
/**
|
||||
* Distribute application-specific large, read-only files efficiently.
|
||||
*
|
||||
* <p><code>DistributedCache</code> is a facility provided by the Map-Reduce
|
||||
* framework to cache files (text, archives, jars etc.) needed by applications.
|
||||
* </p>
|
||||
*
|
||||
* <p>Applications specify the files, via urls (hdfs:// or http://) to be cached
|
||||
* via the {@link org.apache.hadoop.mapred.JobConf}.
|
||||
* The <code>DistributedCache</code> assumes that the
|
||||
* files specified via hdfs:// urls are already present on the
|
||||
* {@link FileSystem} at the path specified by the url.</p>
|
||||
*
|
||||
* <p>The framework will copy the necessary files on to the slave node before
|
||||
* any tasks for the job are executed on that node. Its efficiency stems from
|
||||
* the fact that the files are only copied once per job and the ability to
|
||||
* cache archives which are un-archived on the slaves.</p>
|
||||
*
|
||||
* <p><code>DistributedCache</code> can be used to distribute simple, read-only
|
||||
* data/text files and/or more complex types such as archives, jars etc.
|
||||
* Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes.
|
||||
* Jars may be optionally added to the classpath of the tasks, a rudimentary
|
||||
* software distribution mechanism. Files have execution permissions.
|
||||
* Optionally users can also direct it to symlink the distributed cache file(s)
|
||||
* into the working directory of the task.</p>
|
||||
*
|
||||
* <p><code>DistributedCache</code> tracks modification timestamps of the cache
|
||||
* files. Clearly the cache files should not be modified by the application
|
||||
* or externally while the job is executing.</p>
|
||||
*
|
||||
* <p>Here is an illustrative example on how to use the
|
||||
* <code>DistributedCache</code>:</p>
|
||||
* <p><blockquote><pre>
|
||||
* // Setting up the cache for the application
|
||||
*
|
||||
* 1. Copy the requisite files to the <code>FileSystem</code>:
|
||||
*
|
||||
* $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat
|
||||
* $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip
|
||||
* $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
|
||||
* $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
|
||||
* $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
|
||||
* $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
|
||||
*
|
||||
* 2. Setup the application's <code>JobConf</code>:
|
||||
*
|
||||
* JobConf job = new JobConf();
|
||||
* DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"),
|
||||
* job);
|
||||
* DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
|
||||
* DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
|
||||
* DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
|
||||
* DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
|
||||
* DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
|
||||
*
|
||||
* 3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
|
||||
* or {@link org.apache.hadoop.mapred.Reducer}:
|
||||
*
|
||||
* public static class MapClass extends MapReduceBase
|
||||
* implements Mapper<K, V, K, V> {
|
||||
*
|
||||
* private Path[] localArchives;
|
||||
* private Path[] localFiles;
|
||||
*
|
||||
* public void configure(JobConf job) {
|
||||
* // Get the cached archives/files
|
||||
* localArchives = DistributedCache.getLocalCacheArchives(job);
|
||||
* localFiles = DistributedCache.getLocalCacheFiles(job);
|
||||
* }
|
||||
*
|
||||
* public void map(K key, V value,
|
||||
* OutputCollector<K, V> output, Reporter reporter)
|
||||
* throws IOException {
|
||||
* // Use data from the cached archives/files here
|
||||
* // ...
|
||||
* // ...
|
||||
* output.collect(k, v);
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* </pre></blockquote></p>
|
||||
*
|
||||
* @see org.apache.hadoop.mapred.JobConf
|
||||
* @see org.apache.hadoop.mapred.JobClient
|
||||
*/
|
||||
public class DistributedCache {
|
||||
// cacheID to cacheStatus mapping
|
||||
private static TreeMap<String, CacheStatus> cachedArchives = new TreeMap<String, CacheStatus>();
|
||||
|
||||
private static TreeMap<Path, Long> baseDirSize = new TreeMap<Path, Long>();
|
||||
|
||||
// default total cache size
|
||||
private static final long DEFAULT_CACHE_SIZE = 10737418240L;
|
||||
|
||||
private static final Log LOG =
|
||||
LogFactory.getLog(DistributedCache.class);
|
||||
|
||||
/**
|
||||
* Get the locally cached file or archive; it could either be
|
||||
* previously cached (and valid) or copy it from the {@link FileSystem} now.
|
||||
*
|
||||
* @param cache the cache to be localized, this should be specified as
|
||||
* new URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema
|
||||
* or hostname:port is provided the file is assumed to be in the filesystem
|
||||
* being used in the Configuration
|
||||
* @param conf The Confguration file which contains the filesystem
|
||||
* @param baseDir The base cache Dir where you wnat to localize the files/archives
|
||||
* @param fileStatus The file status on the dfs.
|
||||
* @param isArchive if the cache is an archive or a file. In case it is an
|
||||
* archive with a .zip or .jar or .tar or .tgz or .tar.gz extension it will
|
||||
* be unzipped/unjarred/untarred automatically
|
||||
* and the directory where the archive is unzipped/unjarred/untarred is
|
||||
* returned as the Path.
|
||||
* In case of a file, the path to the file is returned
|
||||
* @param confFileStamp this is the hdfs file modification timestamp to verify that the
|
||||
* file to be cached hasn't changed since the job started
|
||||
* @param currentWorkDir this is the directory where you would want to create symlinks
|
||||
* for the locally cached files/archives
|
||||
* @return the path to directory where the archives are unjarred in case of archives,
|
||||
* the path to the file where the file is copied locally
|
||||
* @throws IOException
|
||||
*/
|
||||
public static Path getLocalCache(URI cache, Configuration conf,
|
||||
Path baseDir, FileStatus fileStatus,
|
||||
boolean isArchive, long confFileStamp,
|
||||
Path currentWorkDir)
|
||||
throws IOException {
|
||||
return getLocalCache(cache, conf, baseDir, fileStatus, isArchive,
|
||||
confFileStamp, currentWorkDir, true);
|
||||
}
|
||||
/**
|
||||
* Get the locally cached file or archive; it could either be
|
||||
* previously cached (and valid) or copy it from the {@link FileSystem} now.
|
||||
*
|
||||
* @param cache the cache to be localized, this should be specified as
|
||||
* new URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema
|
||||
* or hostname:port is provided the file is assumed to be in the filesystem
|
||||
* being used in the Configuration
|
||||
* @param conf The Confguration file which contains the filesystem
|
||||
* @param baseDir The base cache Dir where you wnat to localize the files/archives
|
||||
* @param fileStatus The file status on the dfs.
|
||||
* @param isArchive if the cache is an archive or a file. In case it is an
|
||||
* archive with a .zip or .jar or .tar or .tgz or .tar.gz extension it will
|
||||
* be unzipped/unjarred/untarred automatically
|
||||
* and the directory where the archive is unzipped/unjarred/untarred is
|
||||
* returned as the Path.
|
||||
* In case of a file, the path to the file is returned
|
||||
* @param confFileStamp this is the hdfs file modification timestamp to verify that the
|
||||
* file to be cached hasn't changed since the job started
|
||||
* @param currentWorkDir this is the directory where you would want to create symlinks
|
||||
* for the locally cached files/archives
|
||||
* @param honorSymLinkConf if this is false, then the symlinks are not
|
||||
* created even if conf says so (this is required for an optimization in task
|
||||
* launches
|
||||
* @return the path to directory where the archives are unjarred in case of archives,
|
||||
* the path to the file where the file is copied locally
|
||||
* @throws IOException
|
||||
*/
|
||||
public static Path getLocalCache(URI cache, Configuration conf,
|
||||
Path baseDir, FileStatus fileStatus,
|
||||
boolean isArchive, long confFileStamp,
|
||||
Path currentWorkDir, boolean honorSymLinkConf)
|
||||
throws IOException {
|
||||
String cacheId = makeRelative(cache, conf);
|
||||
CacheStatus lcacheStatus;
|
||||
Path localizedPath;
|
||||
synchronized (cachedArchives) {
|
||||
lcacheStatus = cachedArchives.get(cacheId);
|
||||
if (lcacheStatus == null) {
|
||||
// was never localized
|
||||
lcacheStatus = new CacheStatus(baseDir, new Path(baseDir, new Path(cacheId)));
|
||||
cachedArchives.put(cacheId, lcacheStatus);
|
||||
}
|
||||
|
||||
synchronized (lcacheStatus) {
|
||||
localizedPath = localizeCache(conf, cache, confFileStamp, lcacheStatus,
|
||||
fileStatus, isArchive, currentWorkDir, honorSymLinkConf);
|
||||
lcacheStatus.refcount++;
|
||||
}
|
||||
}
|
||||
|
||||
// try deleting stuff if you can
|
||||
long size = 0;
|
||||
synchronized (baseDirSize) {
|
||||
Long get = baseDirSize.get(baseDir);
|
||||
if ( get != null ) {
|
||||
size = get.longValue();
|
||||
}
|
||||
}
|
||||
// setting the cache size to a default of 10GB
|
||||
long allowedSize = conf.getLong("local.cache.size", DEFAULT_CACHE_SIZE);
|
||||
if (allowedSize < size) {
|
||||
// try some cache deletions
|
||||
deleteCache(conf);
|
||||
}
|
||||
return localizedPath;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the locally cached file or archive; it could either be
|
||||
* previously cached (and valid) or copy it from the {@link FileSystem} now.
|
||||
*
|
||||
* @param cache the cache to be localized, this should be specified as
|
||||
* new URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema
|
||||
* or hostname:port is provided the file is assumed to be in the filesystem
|
||||
* being used in the Configuration
|
||||
* @param conf The Confguration file which contains the filesystem
|
||||
* @param baseDir The base cache Dir where you wnat to localize the files/archives
|
||||
* @param isArchive if the cache is an archive or a file. In case it is an
|
||||
* archive with a .zip or .jar or .tar or .tgz or .tar.gz extension it will
|
||||
* be unzipped/unjarred/untarred automatically
|
||||
* and the directory where the archive is unzipped/unjarred/untarred
|
||||
* is returned as the Path.
|
||||
* In case of a file, the path to the file is returned
|
||||
* @param confFileStamp this is the hdfs file modification timestamp to verify that the
|
||||
* file to be cached hasn't changed since the job started
|
||||
* @param currentWorkDir this is the directory where you would want to create symlinks
|
||||
* for the locally cached files/archives
|
||||
* @return the path to directory where the archives are unjarred in case of archives,
|
||||
* the path to the file where the file is copied locally
|
||||
* @throws IOException
|
||||
|
||||
*/
|
||||
public static Path getLocalCache(URI cache, Configuration conf,
|
||||
Path baseDir, boolean isArchive,
|
||||
long confFileStamp, Path currentWorkDir)
|
||||
throws IOException {
|
||||
return getLocalCache(cache, conf,
|
||||
baseDir, null, isArchive,
|
||||
confFileStamp, currentWorkDir);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the opposite of getlocalcache. When you are done with
|
||||
* using the cache, you need to release the cache
|
||||
* @param cache The cache URI to be released
|
||||
* @param conf configuration which contains the filesystem the cache
|
||||
* is contained in.
|
||||
* @throws IOException
|
||||
*/
|
||||
public static void releaseCache(URI cache, Configuration conf)
|
||||
throws IOException {
|
||||
String cacheId = makeRelative(cache, conf);
|
||||
synchronized (cachedArchives) {
|
||||
CacheStatus lcacheStatus = cachedArchives.get(cacheId);
|
||||
if (lcacheStatus == null)
|
||||
return;
|
||||
synchronized (lcacheStatus) {
|
||||
lcacheStatus.refcount--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// To delete the caches which have a refcount of zero
|
||||
|
||||
private static void deleteCache(Configuration conf) throws IOException {
|
||||
// try deleting cache Status with refcount of zero
|
||||
synchronized (cachedArchives) {
|
||||
for (Iterator it = cachedArchives.keySet().iterator(); it.hasNext();) {
|
||||
String cacheId = (String) it.next();
|
||||
CacheStatus lcacheStatus = cachedArchives.get(cacheId);
|
||||
synchronized (lcacheStatus) {
|
||||
if (lcacheStatus.refcount == 0) {
|
||||
// delete this cache entry
|
||||
FileSystem.getLocal(conf).delete(lcacheStatus.localLoadPath, true);
|
||||
synchronized (baseDirSize) {
|
||||
Long dirSize = baseDirSize.get(lcacheStatus.baseDir);
|
||||
if ( dirSize != null ) {
|
||||
dirSize -= lcacheStatus.size;
|
||||
baseDirSize.put(lcacheStatus.baseDir, dirSize);
|
||||
}
|
||||
}
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the relative path of the dir this cache will be localized in
|
||||
* relative path that this cache will be localized in. For
|
||||
* hdfs://hostname:port/absolute_path -- the relative path is
|
||||
* hostname/absolute path -- if it is just /absolute_path -- then the
|
||||
* relative path is hostname of DFS this mapred cluster is running
|
||||
* on/absolute_path
|
||||
*/
|
||||
public static String makeRelative(URI cache, Configuration conf)
|
||||
throws IOException {
|
||||
String host = cache.getHost();
|
||||
if (host == null) {
|
||||
host = cache.getScheme();
|
||||
}
|
||||
if (host == null) {
|
||||
URI defaultUri = FileSystem.get(conf).getUri();
|
||||
host = defaultUri.getHost();
|
||||
if (host == null) {
|
||||
host = defaultUri.getScheme();
|
||||
}
|
||||
}
|
||||
String path = host + cache.getPath();
|
||||
path = path.replace(":/","/"); // remove windows device colon
|
||||
return path;
|
||||
}
|
||||
|
||||
private static Path cacheFilePath(Path p) {
|
||||
return new Path(p, p.getName());
|
||||
}
|
||||
|
||||
// the method which actually copies the caches locally and unjars/unzips them
|
||||
// and does chmod for the files
|
||||
private static Path localizeCache(Configuration conf,
|
||||
URI cache, long confFileStamp,
|
||||
CacheStatus cacheStatus,
|
||||
FileStatus fileStatus,
|
||||
boolean isArchive,
|
||||
Path currentWorkDir,boolean honorSymLinkConf)
|
||||
throws IOException {
|
||||
boolean doSymlink = honorSymLinkConf && getSymlink(conf);
|
||||
if(cache.getFragment() == null) {
|
||||
doSymlink = false;
|
||||
}
|
||||
FileSystem fs = getFileSystem(cache, conf);
|
||||
String link = currentWorkDir.toString() + Path.SEPARATOR + cache.getFragment();
|
||||
File flink = new File(link);
|
||||
if (ifExistsAndFresh(conf, fs, cache, confFileStamp,
|
||||
cacheStatus, fileStatus)) {
|
||||
if (isArchive) {
|
||||
if (doSymlink){
|
||||
if (!flink.exists())
|
||||
FileUtil.symLink(cacheStatus.localLoadPath.toString(),
|
||||
link);
|
||||
}
|
||||
return cacheStatus.localLoadPath;
|
||||
}
|
||||
else {
|
||||
if (doSymlink){
|
||||
if (!flink.exists())
|
||||
FileUtil.symLink(cacheFilePath(cacheStatus.localLoadPath).toString(),
|
||||
link);
|
||||
}
|
||||
return cacheFilePath(cacheStatus.localLoadPath);
|
||||
}
|
||||
} else {
|
||||
// remove the old archive
|
||||
// if the old archive cannot be removed since it is being used by another
|
||||
// job
|
||||
// return null
|
||||
if (cacheStatus.refcount > 1 && (cacheStatus.currentStatus == true))
|
||||
throw new IOException("Cache " + cacheStatus.localLoadPath.toString()
|
||||
+ " is in use and cannot be refreshed");
|
||||
|
||||
FileSystem localFs = FileSystem.getLocal(conf);
|
||||
localFs.delete(cacheStatus.localLoadPath, true);
|
||||
synchronized (baseDirSize) {
|
||||
Long dirSize = baseDirSize.get(cacheStatus.baseDir);
|
||||
if ( dirSize != null ) {
|
||||
dirSize -= cacheStatus.size;
|
||||
baseDirSize.put(cacheStatus.baseDir, dirSize);
|
||||
}
|
||||
}
|
||||
Path parchive = new Path(cacheStatus.localLoadPath,
|
||||
new Path(cacheStatus.localLoadPath.getName()));
|
||||
|
||||
if (!localFs.mkdirs(cacheStatus.localLoadPath)) {
|
||||
throw new IOException("Mkdirs failed to create directory " +
|
||||
cacheStatus.localLoadPath.toString());
|
||||
}
|
||||
|
||||
String cacheId = cache.getPath();
|
||||
fs.copyToLocalFile(new Path(cacheId), parchive);
|
||||
if (isArchive) {
|
||||
String tmpArchive = parchive.toString().toLowerCase();
|
||||
File srcFile = new File(parchive.toString());
|
||||
File destDir = new File(parchive.getParent().toString());
|
||||
if (tmpArchive.endsWith(".jar")) {
|
||||
RunJar.unJar(srcFile, destDir);
|
||||
} else if (tmpArchive.endsWith(".zip")) {
|
||||
FileUtil.unZip(srcFile, destDir);
|
||||
} else if (isTarFile(tmpArchive)) {
|
||||
FileUtil.unTar(srcFile, destDir);
|
||||
}
|
||||
// else will not do anyhting
|
||||
// and copy the file into the dir as it is
|
||||
}
|
||||
|
||||
long cacheSize = FileUtil.getDU(new File(parchive.getParent().toString()));
|
||||
cacheStatus.size = cacheSize;
|
||||
synchronized (baseDirSize) {
|
||||
Long dirSize = baseDirSize.get(cacheStatus.baseDir);
|
||||
if( dirSize == null ) {
|
||||
dirSize = Long.valueOf(cacheSize);
|
||||
} else {
|
||||
dirSize += cacheSize;
|
||||
}
|
||||
baseDirSize.put(cacheStatus.baseDir, dirSize);
|
||||
}
|
||||
|
||||
// do chmod here
|
||||
try {
|
||||
//Setting recursive permission to grant everyone read and execute
|
||||
FileUtil.chmod(cacheStatus.baseDir.toString(), "ugo+rx",true);
|
||||
} catch(InterruptedException e) {
|
||||
LOG.warn("Exception in chmod" + e.toString());
|
||||
}
|
||||
|
||||
// update cacheStatus to reflect the newly cached file
|
||||
cacheStatus.currentStatus = true;
|
||||
cacheStatus.mtime = getTimestamp(conf, cache);
|
||||
}
|
||||
|
||||
if (isArchive){
|
||||
if (doSymlink){
|
||||
if (!flink.exists())
|
||||
FileUtil.symLink(cacheStatus.localLoadPath.toString(),
|
||||
link);
|
||||
}
|
||||
return cacheStatus.localLoadPath;
|
||||
}
|
||||
else {
|
||||
if (doSymlink){
|
||||
if (!flink.exists())
|
||||
FileUtil.symLink(cacheFilePath(cacheStatus.localLoadPath).toString(),
|
||||
link);
|
||||
}
|
||||
return cacheFilePath(cacheStatus.localLoadPath);
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isTarFile(String filename) {
|
||||
return (filename.endsWith(".tgz") || filename.endsWith(".tar.gz") ||
|
||||
filename.endsWith(".tar"));
|
||||
}
|
||||
|
||||
// Checks if the cache has already been localized and is fresh
|
||||
private static boolean ifExistsAndFresh(Configuration conf, FileSystem fs,
|
||||
URI cache, long confFileStamp,
|
||||
CacheStatus lcacheStatus,
|
||||
FileStatus fileStatus)
|
||||
throws IOException {
|
||||
// check for existence of the cache
|
||||
if (lcacheStatus.currentStatus == false) {
|
||||
return false;
|
||||
} else {
|
||||
long dfsFileStamp;
|
||||
if (fileStatus != null) {
|
||||
dfsFileStamp = fileStatus.getModificationTime();
|
||||
} else {
|
||||
dfsFileStamp = getTimestamp(conf, cache);
|
||||
}
|
||||
|
||||
// ensure that the file on hdfs hasn't been modified since the job started
|
||||
if (dfsFileStamp != confFileStamp) {
|
||||
LOG.fatal("File: " + cache + " has changed on HDFS since job started");
|
||||
throw new IOException("File: " + cache +
|
||||
" has changed on HDFS since job started");
|
||||
}
|
||||
|
||||
if (dfsFileStamp != lcacheStatus.mtime) {
|
||||
// needs refreshing
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns mtime of a given cache file on hdfs.
|
||||
* @param conf configuration
|
||||
* @param cache cache file
|
||||
* @return mtime of a given cache file on hdfs
|
||||
* @throws IOException
|
||||
*/
|
||||
public static long getTimestamp(Configuration conf, URI cache)
|
||||
throws IOException {
|
||||
FileSystem fileSystem = FileSystem.get(cache, conf);
|
||||
Path filePath = new Path(cache.getPath());
|
||||
|
||||
return fileSystem.getFileStatus(filePath).getModificationTime();
|
||||
}
|
||||
|
||||
/**
|
||||
* This method create symlinks for all files in a given dir in another directory
|
||||
* @param conf the configuration
|
||||
* @param jobCacheDir the target directory for creating symlinks
|
||||
* @param workDir the directory in which the symlinks are created
|
||||
* @throws IOException
|
||||
*/
|
||||
public static void createAllSymlink(Configuration conf, File jobCacheDir, File workDir)
|
||||
throws IOException{
|
||||
if ((jobCacheDir == null || !jobCacheDir.isDirectory()) ||
|
||||
workDir == null || (!workDir.isDirectory())) {
|
||||
return;
|
||||
}
|
||||
boolean createSymlink = getSymlink(conf);
|
||||
if (createSymlink){
|
||||
File[] list = jobCacheDir.listFiles();
|
||||
for (int i=0; i < list.length; i++){
|
||||
FileUtil.symLink(list[i].getAbsolutePath(),
|
||||
new File(workDir, list[i].getName()).toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static FileSystem getFileSystem(URI cache, Configuration conf)
|
||||
throws IOException {
|
||||
if ("hdfs".equals(cache.getScheme()))
|
||||
return FileSystem.get(cache, conf);
|
||||
else
|
||||
return FileSystem.get(conf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the configuration with the given set of archives
|
||||
* @param archives The list of archives that need to be localized
|
||||
* @param conf Configuration which will be changed
|
||||
*/
|
||||
public static void setCacheArchives(URI[] archives, Configuration conf) {
|
||||
String sarchives = StringUtils.uriToString(archives);
|
||||
conf.set("mapred.cache.archives", sarchives);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the configuration with the given set of files
|
||||
* @param files The list of files that need to be localized
|
||||
* @param conf Configuration which will be changed
|
||||
*/
|
||||
public static void setCacheFiles(URI[] files, Configuration conf) {
|
||||
String sfiles = StringUtils.uriToString(files);
|
||||
conf.set("mapred.cache.files", sfiles);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cache archives set in the Configuration
|
||||
* @param conf The configuration which contains the archives
|
||||
* @return A URI array of the caches set in the Configuration
|
||||
* @throws IOException
|
||||
*/
|
||||
public static URI[] getCacheArchives(Configuration conf) throws IOException {
|
||||
return StringUtils.stringToURI(conf.getStrings("mapred.cache.archives"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cache files set in the Configuration
|
||||
* @param conf The configuration which contains the files
|
||||
* @return A URI array of the files set in the Configuration
|
||||
* @throws IOException
|
||||
*/
|
||||
|
||||
public static URI[] getCacheFiles(Configuration conf) throws IOException {
|
||||
return StringUtils.stringToURI(conf.getStrings("mapred.cache.files"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the path array of the localized caches
|
||||
* @param conf Configuration that contains the localized archives
|
||||
* @return A path array of localized caches
|
||||
* @throws IOException
|
||||
*/
|
||||
public static Path[] getLocalCacheArchives(Configuration conf)
|
||||
throws IOException {
|
||||
return StringUtils.stringToPath(conf
|
||||
.getStrings("mapred.cache.localArchives"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the path array of the localized files
|
||||
* @param conf Configuration that contains the localized files
|
||||
* @return A path array of localized files
|
||||
* @throws IOException
|
||||
*/
|
||||
public static Path[] getLocalCacheFiles(Configuration conf)
|
||||
throws IOException {
|
||||
return StringUtils.stringToPath(conf.getStrings("mapred.cache.localFiles"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the timestamps of the archives
|
||||
* @param conf The configuration which stored the timestamps
|
||||
* @return a string array of timestamps
|
||||
* @throws IOException
|
||||
*/
|
||||
public static String[] getArchiveTimestamps(Configuration conf) {
|
||||
return conf.getStrings("mapred.cache.archives.timestamps");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the timestamps of the files
|
||||
* @param conf The configuration which stored the timestamps
|
||||
* @return a string array of timestamps
|
||||
* @throws IOException
|
||||
*/
|
||||
public static String[] getFileTimestamps(Configuration conf) {
|
||||
return conf.getStrings("mapred.cache.files.timestamps");
|
||||
}
|
||||
|
||||
/**
|
||||
* This is to check the timestamp of the archives to be localized
|
||||
* @param conf Configuration which stores the timestamp's
|
||||
* @param timestamps comma separated list of timestamps of archives.
|
||||
* The order should be the same as the order in which the archives are added.
|
||||
*/
|
||||
public static void setArchiveTimestamps(Configuration conf, String timestamps) {
|
||||
conf.set("mapred.cache.archives.timestamps", timestamps);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is to check the timestamp of the files to be localized
|
||||
* @param conf Configuration which stores the timestamp's
|
||||
* @param timestamps comma separated list of timestamps of files.
|
||||
* The order should be the same as the order in which the files are added.
|
||||
*/
|
||||
public static void setFileTimestamps(Configuration conf, String timestamps) {
|
||||
conf.set("mapred.cache.files.timestamps", timestamps);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the conf to contain the location for localized archives
|
||||
* @param conf The conf to modify to contain the localized caches
|
||||
* @param str a comma separated list of local archives
|
||||
*/
|
||||
public static void setLocalArchives(Configuration conf, String str) {
|
||||
conf.set("mapred.cache.localArchives", str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the conf to contain the location for localized files
|
||||
* @param conf The conf to modify to contain the localized caches
|
||||
* @param str a comma separated list of local files
|
||||
*/
|
||||
public static void setLocalFiles(Configuration conf, String str) {
|
||||
conf.set("mapred.cache.localFiles", str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a archives to be localized to the conf
|
||||
* @param uri The uri of the cache to be localized
|
||||
* @param conf Configuration to add the cache to
|
||||
*/
|
||||
public static void addCacheArchive(URI uri, Configuration conf) {
|
||||
String archives = conf.get("mapred.cache.archives");
|
||||
conf.set("mapred.cache.archives", archives == null ? uri.toString()
|
||||
: archives + "," + uri.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a file to be localized to the conf
|
||||
* @param uri The uri of the cache to be localized
|
||||
* @param conf Configuration to add the cache to
|
||||
*/
|
||||
public static void addCacheFile(URI uri, Configuration conf) {
|
||||
String files = conf.get("mapred.cache.files");
|
||||
conf.set("mapred.cache.files", files == null ? uri.toString() : files + ","
|
||||
+ uri.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an file path to the current set of classpath entries It adds the file
|
||||
* to cache as well.
|
||||
*
|
||||
* @param file Path of the file to be added
|
||||
* @param conf Configuration that contains the classpath setting
|
||||
*/
|
||||
public static void addFileToClassPath(Path file, Configuration conf)
|
||||
throws IOException {
|
||||
String classpath = conf.get("mapred.job.classpath.files");
|
||||
conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
|
||||
: classpath + System.getProperty("path.separator") + file.toString());
|
||||
FileSystem fs = FileSystem.get(conf);
|
||||
URI uri = fs.makeQualified(file).toUri();
|
||||
|
||||
addCacheFile(uri, conf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the file entries in classpath as an array of Path
|
||||
*
|
||||
* @param conf Configuration that contains the classpath setting
|
||||
*/
|
||||
public static Path[] getFileClassPaths(Configuration conf) {
|
||||
String classpath = conf.get("mapred.job.classpath.files");
|
||||
if (classpath == null)
|
||||
return null;
|
||||
ArrayList list = Collections.list(new StringTokenizer(classpath, System
|
||||
.getProperty("path.separator")));
|
||||
Path[] paths = new Path[list.size()];
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
paths[i] = new Path((String) list.get(i));
|
||||
}
|
||||
return paths;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an archive path to the current set of classpath entries. It adds the
|
||||
* archive to cache as well.
|
||||
*
|
||||
* @param archive Path of the archive to be added
|
||||
* @param conf Configuration that contains the classpath setting
|
||||
*/
|
||||
public static void addArchiveToClassPath(Path archive, Configuration conf)
|
||||
throws IOException {
|
||||
String classpath = conf.get("mapred.job.classpath.archives");
|
||||
conf.set("mapred.job.classpath.archives", classpath == null ? archive
|
||||
.toString() : classpath + System.getProperty("path.separator")
|
||||
+ archive.toString());
|
||||
FileSystem fs = FileSystem.get(conf);
|
||||
URI uri = fs.makeQualified(archive).toUri();
|
||||
|
||||
addCacheArchive(uri, conf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the archive entries in classpath as an array of Path
|
||||
*
|
||||
* @param conf Configuration that contains the classpath setting
|
||||
*/
|
||||
public static Path[] getArchiveClassPaths(Configuration conf) {
|
||||
String classpath = conf.get("mapred.job.classpath.archives");
|
||||
if (classpath == null)
|
||||
return null;
|
||||
ArrayList list = Collections.list(new StringTokenizer(classpath, System
|
||||
.getProperty("path.separator")));
|
||||
Path[] paths = new Path[list.size()];
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
paths[i] = new Path((String) list.get(i));
|
||||
}
|
||||
return paths;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method allows you to create symlinks in the current working directory
|
||||
* of the task to all the cache files/archives
|
||||
* @param conf the jobconf
|
||||
*/
|
||||
public static void createSymlink(Configuration conf){
|
||||
conf.set("mapred.create.symlink", "yes");
|
||||
}
|
||||
|
||||
/**
|
||||
* This method checks to see if symlinks are to be create for the
|
||||
* localized cache files in the current working directory
|
||||
* @param conf the jobconf
|
||||
* @return true if symlinks are to be created- else return false
|
||||
*/
|
||||
public static boolean getSymlink(Configuration conf){
|
||||
String result = conf.get("mapred.create.symlink");
|
||||
if ("yes".equals(result)){
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method checks if there is a conflict in the fragment names
|
||||
* of the uris. Also makes sure that each uri has a fragment. It
|
||||
* is only to be called if you want to create symlinks for
|
||||
* the various archives and files.
|
||||
* @param uriFiles The uri array of urifiles
|
||||
* @param uriArchives the uri array of uri archives
|
||||
*/
|
||||
public static boolean checkURIs(URI[] uriFiles, URI[] uriArchives){
|
||||
if ((uriFiles == null) && (uriArchives == null)){
|
||||
return true;
|
||||
}
|
||||
if (uriFiles != null){
|
||||
for (int i = 0; i < uriFiles.length; i++){
|
||||
String frag1 = uriFiles[i].getFragment();
|
||||
if (frag1 == null)
|
||||
return false;
|
||||
for (int j=i+1; j < uriFiles.length; j++){
|
||||
String frag2 = uriFiles[j].getFragment();
|
||||
if (frag2 == null)
|
||||
return false;
|
||||
if (frag1.equalsIgnoreCase(frag2))
|
||||
return false;
|
||||
}
|
||||
if (uriArchives != null){
|
||||
for (int j = 0; j < uriArchives.length; j++){
|
||||
String frag2 = uriArchives[j].getFragment();
|
||||
if (frag2 == null){
|
||||
return false;
|
||||
}
|
||||
if (frag1.equalsIgnoreCase(frag2))
|
||||
return false;
|
||||
for (int k=j+1; k < uriArchives.length; k++){
|
||||
String frag3 = uriArchives[k].getFragment();
|
||||
if (frag3 == null)
|
||||
return false;
|
||||
if (frag2.equalsIgnoreCase(frag3))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private static class CacheStatus {
|
||||
// false, not loaded yet, true is loaded
|
||||
boolean currentStatus;
|
||||
|
||||
// the local load path of this cache
|
||||
Path localLoadPath;
|
||||
|
||||
//the base dir where the cache lies
|
||||
Path baseDir;
|
||||
|
||||
//the size of this cache
|
||||
long size;
|
||||
|
||||
// number of instances using this cache
|
||||
int refcount;
|
||||
|
||||
// the cache-file modification time
|
||||
long mtime;
|
||||
|
||||
public CacheStatus(Path baseDir, Path localLoadPath) {
|
||||
super();
|
||||
this.currentStatus = false;
|
||||
this.localLoadPath = localLoadPath;
|
||||
this.refcount = 0;
|
||||
this.mtime = -1;
|
||||
this.baseDir = baseDir;
|
||||
this.size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the entire contents of the cache and delete the backing files. This
|
||||
* should only be used when the server is reinitializing, because the users
|
||||
* are going to lose their files.
|
||||
*/
|
||||
public static void purgeCache(Configuration conf) throws IOException {
|
||||
synchronized (cachedArchives) {
|
||||
FileSystem localFs = FileSystem.getLocal(conf);
|
||||
for (Map.Entry<String,CacheStatus> f: cachedArchives.entrySet()) {
|
||||
try {
|
||||
localFs.delete(f.getValue().localLoadPath, true);
|
||||
} catch (IOException ie) {
|
||||
LOG.debug("Error cleaning up cache", ie);
|
||||
}
|
||||
}
|
||||
cachedArchives.clear();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,241 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import org.apache.hadoop.io.*;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/*
|
||||
* A BlockLocation lists hosts, offset and length
|
||||
* of block.
|
||||
*
|
||||
*/
|
||||
public class BlockLocation implements Writable {
|
||||
|
||||
static { // register a ctor
|
||||
WritableFactories.setFactory
|
||||
(BlockLocation.class,
|
||||
new WritableFactory() {
|
||||
public Writable newInstance() { return new BlockLocation(); }
|
||||
});
|
||||
}
|
||||
|
||||
private String[] hosts; //hostnames of datanodes
|
||||
private String[] names; //hostname:portNumber of datanodes
|
||||
private String[] topologyPaths; // full path name in network topology
|
||||
private long offset; //offset of the of the block in the file
|
||||
private long length;
|
||||
|
||||
/**
|
||||
* Default Constructor
|
||||
*/
|
||||
public BlockLocation() {
|
||||
this(new String[0], new String[0], 0L, 0L);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor with host, name, offset and length
|
||||
*/
|
||||
public BlockLocation(String[] names, String[] hosts, long offset,
|
||||
long length) {
|
||||
if (names == null) {
|
||||
this.names = new String[0];
|
||||
} else {
|
||||
this.names = names;
|
||||
}
|
||||
if (hosts == null) {
|
||||
this.hosts = new String[0];
|
||||
} else {
|
||||
this.hosts = hosts;
|
||||
}
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
this.topologyPaths = new String[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor with host, name, network topology, offset and length
|
||||
*/
|
||||
public BlockLocation(String[] names, String[] hosts, String[] topologyPaths,
|
||||
long offset, long length) {
|
||||
this(names, hosts, offset, length);
|
||||
if (topologyPaths == null) {
|
||||
this.topologyPaths = new String[0];
|
||||
} else {
|
||||
this.topologyPaths = topologyPaths;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of hosts (hostname) hosting this block
|
||||
*/
|
||||
public String[] getHosts() throws IOException {
|
||||
if ((hosts == null) || (hosts.length == 0)) {
|
||||
return new String[0];
|
||||
} else {
|
||||
return hosts;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of names (hostname:port) hosting this block
|
||||
*/
|
||||
public String[] getNames() throws IOException {
|
||||
if ((names == null) || (names.length == 0)) {
|
||||
return new String[0];
|
||||
} else {
|
||||
return this.names;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of network topology paths for each of the hosts.
|
||||
* The last component of the path is the host.
|
||||
*/
|
||||
public String[] getTopologyPaths() throws IOException {
|
||||
if ((topologyPaths == null) || (topologyPaths.length == 0)) {
|
||||
return new String[0];
|
||||
} else {
|
||||
return this.topologyPaths;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the start offset of file associated with this block
|
||||
*/
|
||||
public long getOffset() {
|
||||
return offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the length of the block
|
||||
*/
|
||||
public long getLength() {
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the start offset of file associated with this block
|
||||
*/
|
||||
public void setOffset(long offset) {
|
||||
this.offset = offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the length of block
|
||||
*/
|
||||
public void setLength(long length) {
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the hosts hosting this block
|
||||
*/
|
||||
public void setHosts(String[] hosts) throws IOException {
|
||||
if (hosts == null) {
|
||||
this.hosts = new String[0];
|
||||
} else {
|
||||
this.hosts = hosts;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the names (host:port) hosting this block
|
||||
*/
|
||||
public void setNames(String[] names) throws IOException {
|
||||
if (names == null) {
|
||||
this.names = new String[0];
|
||||
} else {
|
||||
this.names = names;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the network topology paths of the hosts
|
||||
*/
|
||||
public void setTopologyPaths(String[] topologyPaths) throws IOException {
|
||||
if (topologyPaths == null) {
|
||||
this.topologyPaths = new String[0];
|
||||
} else {
|
||||
this.topologyPaths = topologyPaths;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement write of Writable
|
||||
*/
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeLong(offset);
|
||||
out.writeLong(length);
|
||||
out.writeInt(names.length);
|
||||
for (int i=0; i < names.length; i++) {
|
||||
Text name = new Text(names[i]);
|
||||
name.write(out);
|
||||
}
|
||||
out.writeInt(hosts.length);
|
||||
for (int i=0; i < hosts.length; i++) {
|
||||
Text host = new Text(hosts[i]);
|
||||
host.write(out);
|
||||
}
|
||||
out.writeInt(topologyPaths.length);
|
||||
for (int i=0; i < topologyPaths.length; i++) {
|
||||
Text host = new Text(topologyPaths[i]);
|
||||
host.write(out);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement readFields of Writable
|
||||
*/
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
this.offset = in.readLong();
|
||||
this.length = in.readLong();
|
||||
int numNames = in.readInt();
|
||||
this.names = new String[numNames];
|
||||
for (int i = 0; i < numNames; i++) {
|
||||
Text name = new Text();
|
||||
name.readFields(in);
|
||||
names[i] = name.toString();
|
||||
}
|
||||
int numHosts = in.readInt();
|
||||
for (int i = 0; i < numHosts; i++) {
|
||||
Text host = new Text();
|
||||
host.readFields(in);
|
||||
hosts[i] = host.toString();
|
||||
}
|
||||
int numTops = in.readInt();
|
||||
Text path = new Text();
|
||||
for (int i = 0; i < numTops; i++) {
|
||||
path.readFields(in);
|
||||
topologyPaths[i] = path.toString();
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder result = new StringBuilder();
|
||||
result.append(offset);
|
||||
result.append(',');
|
||||
result.append(length);
|
||||
for(String h: hosts) {
|
||||
result.append(',');
|
||||
result.append(h);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
/**
|
||||
* A class optimizes reading from FSInputStream by bufferring
|
||||
*/
|
||||
|
||||
|
||||
public class BufferedFSInputStream extends BufferedInputStream
|
||||
implements Seekable, PositionedReadable {
|
||||
/**
|
||||
* Creates a <code>BufferedFSInputStream</code>
|
||||
* with the specified buffer size,
|
||||
* and saves its argument, the input stream
|
||||
* <code>in</code>, for later use. An internal
|
||||
* buffer array of length <code>size</code>
|
||||
* is created and stored in <code>buf</code>.
|
||||
*
|
||||
* @param in the underlying input stream.
|
||||
* @param size the buffer size.
|
||||
* @exception IllegalArgumentException if size <= 0.
|
||||
*/
|
||||
public BufferedFSInputStream(FSInputStream in, int size) {
|
||||
super(in, size);
|
||||
}
|
||||
|
||||
public long getPos() throws IOException {
|
||||
return ((FSInputStream)in).getPos()-(count-pos);
|
||||
}
|
||||
|
||||
public long skip(long n) throws IOException {
|
||||
if (n <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
seek(getPos()+n);
|
||||
return n;
|
||||
}
|
||||
|
||||
public void seek(long pos) throws IOException {
|
||||
if( pos<0 ) {
|
||||
return;
|
||||
}
|
||||
// optimize: check if the pos is in the buffer
|
||||
long end = ((FSInputStream)in).getPos();
|
||||
long start = end - count;
|
||||
if( pos>=start && pos<end) {
|
||||
this.pos = (int)(pos-start);
|
||||
return;
|
||||
}
|
||||
|
||||
// invalidate buffer
|
||||
this.pos = 0;
|
||||
this.count = 0;
|
||||
|
||||
((FSInputStream)in).seek(pos);
|
||||
}
|
||||
|
||||
public boolean seekToNewSource(long targetPos) throws IOException {
|
||||
pos = 0;
|
||||
count = 0;
|
||||
return ((FSInputStream)in).seekToNewSource(targetPos);
|
||||
}
|
||||
|
||||
public int read(long position, byte[] buffer, int offset, int length) throws IOException {
|
||||
return ((FSInputStream)in).read(position, buffer, offset, length) ;
|
||||
}
|
||||
|
||||
public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
|
||||
((FSInputStream)in).readFully(position, buffer, offset, length);
|
||||
}
|
||||
|
||||
public void readFully(long position, byte[] buffer) throws IOException {
|
||||
((FSInputStream)in).readFully(position, buffer);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** Thrown for checksum errors. */
|
||||
public class ChecksumException extends IOException {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private long pos;
|
||||
public ChecksumException(String description, long pos) {
|
||||
super(description);
|
||||
this.pos = pos;
|
||||
}
|
||||
|
||||
public long getPos() {
|
||||
return pos;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,547 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.zip.CRC32;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
|
||||
/****************************************************************
|
||||
* Abstract Checksumed FileSystem.
|
||||
* It provide a basice implementation of a Checksumed FileSystem,
|
||||
* which creates a checksum file for each raw file.
|
||||
* It generates & verifies checksums at the client side.
|
||||
*
|
||||
*****************************************************************/
|
||||
public abstract class ChecksumFileSystem extends FilterFileSystem {
|
||||
private static final byte[] CHECKSUM_VERSION = new byte[] {'c', 'r', 'c', 0};
|
||||
private int bytesPerChecksum = 512;
|
||||
private boolean verifyChecksum = true;
|
||||
|
||||
public static double getApproxChkSumLength(long size) {
|
||||
return ChecksumFSOutputSummer.CHKSUM_AS_FRACTION * size;
|
||||
}
|
||||
|
||||
public ChecksumFileSystem(FileSystem fs) {
|
||||
super(fs);
|
||||
}
|
||||
|
||||
public void setConf(Configuration conf) {
|
||||
super.setConf(conf);
|
||||
if (conf != null) {
|
||||
bytesPerChecksum = conf.getInt("io.bytes.per.checksum", 512);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whether to verify checksum.
|
||||
*/
|
||||
public void setVerifyChecksum(boolean verifyChecksum) {
|
||||
this.verifyChecksum = verifyChecksum;
|
||||
}
|
||||
|
||||
/** get the raw file system */
|
||||
public FileSystem getRawFileSystem() {
|
||||
return fs;
|
||||
}
|
||||
|
||||
/** Return the name of the checksum file associated with a file.*/
|
||||
public Path getChecksumFile(Path file) {
|
||||
return new Path(file.getParent(), "." + file.getName() + ".crc");
|
||||
}
|
||||
|
||||
/** Return true iff file is a checksum file name.*/
|
||||
public static boolean isChecksumFile(Path file) {
|
||||
String name = file.getName();
|
||||
return name.startsWith(".") && name.endsWith(".crc");
|
||||
}
|
||||
|
||||
/** Return the length of the checksum file given the size of the
|
||||
* actual file.
|
||||
**/
|
||||
public long getChecksumFileLength(Path file, long fileSize) {
|
||||
return getChecksumLength(fileSize, getBytesPerSum());
|
||||
}
|
||||
|
||||
/** Return the bytes Per Checksum */
|
||||
public int getBytesPerSum() {
|
||||
return bytesPerChecksum;
|
||||
}
|
||||
|
||||
private int getSumBufferSize(int bytesPerSum, int bufferSize) {
|
||||
int defaultBufferSize = getConf().getInt("io.file.buffer.size", 4096);
|
||||
int proportionalBufferSize = bufferSize / bytesPerSum;
|
||||
return Math.max(bytesPerSum,
|
||||
Math.max(proportionalBufferSize, defaultBufferSize));
|
||||
}
|
||||
|
||||
/*******************************************************
|
||||
* For open()'s FSInputStream
|
||||
* It verifies that data matches checksums.
|
||||
*******************************************************/
|
||||
private static class ChecksumFSInputChecker extends FSInputChecker {
|
||||
public static final Log LOG
|
||||
= LogFactory.getLog(FSInputChecker.class);
|
||||
|
||||
private ChecksumFileSystem fs;
|
||||
private FSDataInputStream datas;
|
||||
private FSDataInputStream sums;
|
||||
|
||||
private static final int HEADER_LENGTH = 8;
|
||||
|
||||
private int bytesPerSum = 1;
|
||||
private long fileLen = -1L;
|
||||
|
||||
public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file)
|
||||
throws IOException {
|
||||
this(fs, file, fs.getConf().getInt("io.file.buffer.size", 4096));
|
||||
}
|
||||
|
||||
public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize)
|
||||
throws IOException {
|
||||
super( file, fs.getFileStatus(file).getReplication() );
|
||||
this.datas = fs.getRawFileSystem().open(file, bufferSize);
|
||||
this.fs = fs;
|
||||
Path sumFile = fs.getChecksumFile(file);
|
||||
try {
|
||||
int sumBufferSize = fs.getSumBufferSize(fs.getBytesPerSum(), bufferSize);
|
||||
sums = fs.getRawFileSystem().open(sumFile, sumBufferSize);
|
||||
|
||||
byte[] version = new byte[CHECKSUM_VERSION.length];
|
||||
sums.readFully(version);
|
||||
if (!Arrays.equals(version, CHECKSUM_VERSION))
|
||||
throw new IOException("Not a checksum file: "+sumFile);
|
||||
this.bytesPerSum = sums.readInt();
|
||||
set(fs.verifyChecksum, new CRC32(), bytesPerSum, 4);
|
||||
} catch (FileNotFoundException e) { // quietly ignore
|
||||
set(fs.verifyChecksum, null, 1, 0);
|
||||
} catch (IOException e) { // loudly ignore
|
||||
LOG.warn("Problem opening checksum file: "+ file +
|
||||
". Ignoring exception: " +
|
||||
StringUtils.stringifyException(e));
|
||||
set(fs.verifyChecksum, null, 1, 0);
|
||||
}
|
||||
}
|
||||
|
||||
private long getChecksumFilePos( long dataPos ) {
|
||||
return HEADER_LENGTH + 4*(dataPos/bytesPerSum);
|
||||
}
|
||||
|
||||
protected long getChunkPosition( long dataPos ) {
|
||||
return dataPos/bytesPerSum*bytesPerSum;
|
||||
}
|
||||
|
||||
public int available() throws IOException {
|
||||
return datas.available() + super.available();
|
||||
}
|
||||
|
||||
public int read(long position, byte[] b, int off, int len)
|
||||
throws IOException {
|
||||
// parameter check
|
||||
if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
|
||||
throw new IndexOutOfBoundsException();
|
||||
} else if (len == 0) {
|
||||
return 0;
|
||||
}
|
||||
if( position<0 ) {
|
||||
throw new IllegalArgumentException(
|
||||
"Parameter position can not to be negative");
|
||||
}
|
||||
|
||||
ChecksumFSInputChecker checker = new ChecksumFSInputChecker(fs, file);
|
||||
checker.seek(position);
|
||||
int nread = checker.read(b, off, len);
|
||||
checker.close();
|
||||
return nread;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
datas.close();
|
||||
if( sums != null ) {
|
||||
sums.close();
|
||||
}
|
||||
set(fs.verifyChecksum, null, 1, 0);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean seekToNewSource(long targetPos) throws IOException {
|
||||
long sumsPos = getChecksumFilePos(targetPos);
|
||||
fs.reportChecksumFailure(file, datas, targetPos, sums, sumsPos);
|
||||
boolean newDataSource = datas.seekToNewSource(targetPos);
|
||||
return sums.seekToNewSource(sumsPos) || newDataSource;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int readChunk(long pos, byte[] buf, int offset, int len,
|
||||
byte[] checksum) throws IOException {
|
||||
boolean eof = false;
|
||||
if(needChecksum()) {
|
||||
try {
|
||||
long checksumPos = getChecksumFilePos(pos);
|
||||
if(checksumPos != sums.getPos()) {
|
||||
sums.seek(checksumPos);
|
||||
}
|
||||
sums.readFully(checksum);
|
||||
} catch (EOFException e) {
|
||||
eof = true;
|
||||
}
|
||||
len = bytesPerSum;
|
||||
}
|
||||
if(pos != datas.getPos()) {
|
||||
datas.seek(pos);
|
||||
}
|
||||
int nread = readFully(datas, buf, offset, len);
|
||||
if( eof && nread > 0) {
|
||||
throw new ChecksumException("Checksum error: "+file+" at "+pos, pos);
|
||||
}
|
||||
return nread;
|
||||
}
|
||||
|
||||
/* Return the file length */
|
||||
private long getFileLength() throws IOException {
|
||||
if( fileLen==-1L ) {
|
||||
fileLen = fs.getContentSummary(file).getLength();
|
||||
}
|
||||
return fileLen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips over and discards <code>n</code> bytes of data from the
|
||||
* input stream.
|
||||
*
|
||||
*The <code>skip</code> method skips over some smaller number of bytes
|
||||
* when reaching end of file before <code>n</code> bytes have been skipped.
|
||||
* The actual number of bytes skipped is returned. If <code>n</code> is
|
||||
* negative, no bytes are skipped.
|
||||
*
|
||||
* @param n the number of bytes to be skipped.
|
||||
* @return the actual number of bytes skipped.
|
||||
* @exception IOException if an I/O error occurs.
|
||||
* ChecksumException if the chunk to skip to is corrupted
|
||||
*/
|
||||
public synchronized long skip(long n) throws IOException {
|
||||
long curPos = getPos();
|
||||
long fileLength = getFileLength();
|
||||
if( n+curPos > fileLength ) {
|
||||
n = fileLength - curPos;
|
||||
}
|
||||
return super.skip(n);
|
||||
}
|
||||
|
||||
/**
|
||||
* Seek to the given position in the stream.
|
||||
* The next read() will be from that position.
|
||||
*
|
||||
* <p>This method does not allow seek past the end of the file.
|
||||
* This produces IOException.
|
||||
*
|
||||
* @param pos the postion to seek to.
|
||||
* @exception IOException if an I/O error occurs or seeks after EOF
|
||||
* ChecksumException if the chunk to seek to is corrupted
|
||||
*/
|
||||
|
||||
public synchronized void seek(long pos) throws IOException {
|
||||
if(pos>getFileLength()) {
|
||||
throw new IOException("Cannot seek after EOF");
|
||||
}
|
||||
super.seek(pos);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens an FSDataInputStream at the indicated Path.
|
||||
* @param f the file name to open
|
||||
* @param bufferSize the size of the buffer to be used.
|
||||
*/
|
||||
@Override
|
||||
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
|
||||
return new FSDataInputStream(
|
||||
new ChecksumFSInputChecker(this, f, bufferSize));
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public FSDataOutputStream append(Path f, int bufferSize,
|
||||
Progressable progress) throws IOException {
|
||||
throw new IOException("Not supported");
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculated the length of the checksum file in bytes.
|
||||
* @param size the length of the data file in bytes
|
||||
* @param bytesPerSum the number of bytes in a checksum block
|
||||
* @return the number of bytes in the checksum file
|
||||
*/
|
||||
public static long getChecksumLength(long size, int bytesPerSum) {
|
||||
//the checksum length is equal to size passed divided by bytesPerSum +
|
||||
//bytes written in the beginning of the checksum file.
|
||||
return ((size + bytesPerSum - 1) / bytesPerSum) * 4 +
|
||||
CHECKSUM_VERSION.length + 4;
|
||||
}
|
||||
|
||||
/** This class provides an output stream for a checksummed file.
|
||||
* It generates checksums for data. */
|
||||
private static class ChecksumFSOutputSummer extends FSOutputSummer {
|
||||
private FSDataOutputStream datas;
|
||||
private FSDataOutputStream sums;
|
||||
private static final float CHKSUM_AS_FRACTION = 0.01f;
|
||||
|
||||
public ChecksumFSOutputSummer(ChecksumFileSystem fs,
|
||||
Path file,
|
||||
boolean overwrite,
|
||||
short replication,
|
||||
long blockSize,
|
||||
Configuration conf)
|
||||
throws IOException {
|
||||
this(fs, file, overwrite,
|
||||
conf.getInt("io.file.buffer.size", 4096),
|
||||
replication, blockSize, null);
|
||||
}
|
||||
|
||||
public ChecksumFSOutputSummer(ChecksumFileSystem fs,
|
||||
Path file,
|
||||
boolean overwrite,
|
||||
int bufferSize,
|
||||
short replication,
|
||||
long blockSize,
|
||||
Progressable progress)
|
||||
throws IOException {
|
||||
super(new CRC32(), fs.getBytesPerSum(), 4);
|
||||
int bytesPerSum = fs.getBytesPerSum();
|
||||
this.datas = fs.getRawFileSystem().create(file, overwrite, bufferSize,
|
||||
replication, blockSize, progress);
|
||||
int sumBufferSize = fs.getSumBufferSize(bytesPerSum, bufferSize);
|
||||
this.sums = fs.getRawFileSystem().create(fs.getChecksumFile(file), true,
|
||||
sumBufferSize, replication,
|
||||
blockSize);
|
||||
sums.write(CHECKSUM_VERSION, 0, CHECKSUM_VERSION.length);
|
||||
sums.writeInt(bytesPerSum);
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
flushBuffer();
|
||||
sums.close();
|
||||
datas.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeChunk(byte[] b, int offset, int len, byte[] checksum)
|
||||
throws IOException {
|
||||
datas.write(b, offset, len);
|
||||
sums.write(checksum);
|
||||
}
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public FSDataOutputStream create(Path f, FsPermission permission,
|
||||
boolean overwrite, int bufferSize, short replication, long blockSize,
|
||||
Progressable progress) throws IOException {
|
||||
Path parent = f.getParent();
|
||||
if (parent != null && !mkdirs(parent)) {
|
||||
throw new IOException("Mkdirs failed to create " + parent);
|
||||
}
|
||||
final FSDataOutputStream out = new FSDataOutputStream(
|
||||
new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication,
|
||||
blockSize, progress), null);
|
||||
if (permission != null) {
|
||||
setPermission(f, permission);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set replication for an existing file.
|
||||
* Implement the abstract <tt>setReplication</tt> of <tt>FileSystem</tt>
|
||||
* @param src file name
|
||||
* @param replication new replication
|
||||
* @throws IOException
|
||||
* @return true if successful;
|
||||
* false if file does not exist or is a directory
|
||||
*/
|
||||
public boolean setReplication(Path src, short replication) throws IOException {
|
||||
boolean value = fs.setReplication(src, replication);
|
||||
if (!value)
|
||||
return false;
|
||||
|
||||
Path checkFile = getChecksumFile(src);
|
||||
if (exists(checkFile))
|
||||
fs.setReplication(checkFile, replication);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rename files/dirs
|
||||
*/
|
||||
public boolean rename(Path src, Path dst) throws IOException {
|
||||
if (fs.isDirectory(src)) {
|
||||
return fs.rename(src, dst);
|
||||
} else {
|
||||
|
||||
boolean value = fs.rename(src, dst);
|
||||
if (!value)
|
||||
return false;
|
||||
|
||||
Path checkFile = getChecksumFile(src);
|
||||
if (fs.exists(checkFile)) { //try to rename checksum
|
||||
if (fs.isDirectory(dst)) {
|
||||
value = fs.rename(checkFile, dst);
|
||||
} else {
|
||||
value = fs.rename(checkFile, getChecksumFile(dst));
|
||||
}
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement the delete(Path, boolean) in checksum
|
||||
* file system.
|
||||
*/
|
||||
public boolean delete(Path f, boolean recursive) throws IOException{
|
||||
FileStatus fstatus = null;
|
||||
try {
|
||||
fstatus = fs.getFileStatus(f);
|
||||
} catch(FileNotFoundException e) {
|
||||
return false;
|
||||
}
|
||||
if(fstatus.isDir()) {
|
||||
//this works since the crcs are in the same
|
||||
//directories and the files. so we just delete
|
||||
//everything in the underlying filesystem
|
||||
return fs.delete(f, recursive);
|
||||
} else {
|
||||
Path checkFile = getChecksumFile(f);
|
||||
if (fs.exists(checkFile)) {
|
||||
fs.delete(checkFile, true);
|
||||
}
|
||||
return fs.delete(f, true);
|
||||
}
|
||||
}
|
||||
|
||||
final private static PathFilter DEFAULT_FILTER = new PathFilter() {
|
||||
public boolean accept(Path file) {
|
||||
return !isChecksumFile(file);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* List the statuses of the files/directories in the given path if the path is
|
||||
* a directory.
|
||||
*
|
||||
* @param f
|
||||
* given path
|
||||
* @return the statuses of the files/directories in the given patch
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public FileStatus[] listStatus(Path f) throws IOException {
|
||||
return fs.listStatus(f, DEFAULT_FILTER);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean mkdirs(Path f) throws IOException {
|
||||
return fs.mkdirs(f);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
|
||||
throws IOException {
|
||||
Configuration conf = getConf();
|
||||
FileUtil.copy(getLocal(conf), src, this, dst, delSrc, conf);
|
||||
}
|
||||
|
||||
/**
|
||||
* The src file is under FS, and the dst is on the local disk.
|
||||
* Copy it from FS control to the local dst name.
|
||||
*/
|
||||
@Override
|
||||
public void copyToLocalFile(boolean delSrc, Path src, Path dst)
|
||||
throws IOException {
|
||||
Configuration conf = getConf();
|
||||
FileUtil.copy(this, src, getLocal(conf), dst, delSrc, conf);
|
||||
}
|
||||
|
||||
/**
|
||||
* The src file is under FS, and the dst is on the local disk.
|
||||
* Copy it from FS control to the local dst name.
|
||||
* If src and dst are directories, the copyCrc parameter
|
||||
* determines whether to copy CRC files.
|
||||
*/
|
||||
public void copyToLocalFile(Path src, Path dst, boolean copyCrc)
|
||||
throws IOException {
|
||||
if (!fs.isDirectory(src)) { // source is a file
|
||||
fs.copyToLocalFile(src, dst);
|
||||
FileSystem localFs = getLocal(getConf()).getRawFileSystem();
|
||||
if (localFs.isDirectory(dst)) {
|
||||
dst = new Path(dst, src.getName());
|
||||
}
|
||||
dst = getChecksumFile(dst);
|
||||
if (localFs.exists(dst)) { //remove old local checksum file
|
||||
localFs.delete(dst, true);
|
||||
}
|
||||
Path checksumFile = getChecksumFile(src);
|
||||
if (copyCrc && fs.exists(checksumFile)) { //copy checksum file
|
||||
fs.copyToLocalFile(checksumFile, dst);
|
||||
}
|
||||
} else {
|
||||
FileStatus[] srcs = listStatus(src);
|
||||
for (FileStatus srcFile : srcs) {
|
||||
copyToLocalFile(srcFile.getPath(),
|
||||
new Path(dst, srcFile.getPath().getName()), copyCrc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
|
||||
throws IOException {
|
||||
return tmpLocalFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
|
||||
throws IOException {
|
||||
moveFromLocalFile(tmpLocalFile, fsOutputFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Report a checksum error to the file system.
|
||||
* @param f the file name containing the error
|
||||
* @param in the stream open on the file
|
||||
* @param inPos the position of the beginning of the bad data in the file
|
||||
* @param sums the stream open on the checksum file
|
||||
* @param sumsPos the position of the beginning of the bad data in the checksum file
|
||||
* @return if retry is neccessary
|
||||
*/
|
||||
public boolean reportChecksumFailure(Path f, FSDataInputStream in,
|
||||
long inPos, FSDataInputStream sums, long sumsPos) {
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,164 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
/** Store the summary of a content (a directory or a file). */
|
||||
public class ContentSummary implements Writable{
|
||||
private long length;
|
||||
private long fileCount;
|
||||
private long directoryCount;
|
||||
private long quota;
|
||||
private long spaceConsumed;
|
||||
private long spaceQuota;
|
||||
|
||||
|
||||
/** Constructor */
|
||||
public ContentSummary() {}
|
||||
|
||||
/** Constructor */
|
||||
public ContentSummary(long length, long fileCount, long directoryCount) {
|
||||
this(length, fileCount, directoryCount, -1L, length, -1L);
|
||||
}
|
||||
|
||||
/** Constructor */
|
||||
public ContentSummary(
|
||||
long length, long fileCount, long directoryCount, long quota,
|
||||
long spaceConsumed, long spaceQuota) {
|
||||
this.length = length;
|
||||
this.fileCount = fileCount;
|
||||
this.directoryCount = directoryCount;
|
||||
this.quota = quota;
|
||||
this.spaceConsumed = spaceConsumed;
|
||||
this.spaceQuota = spaceQuota;
|
||||
}
|
||||
|
||||
/** @return the length */
|
||||
public long getLength() {return length;}
|
||||
|
||||
/** @return the directory count */
|
||||
public long getDirectoryCount() {return directoryCount;}
|
||||
|
||||
/** @return the file count */
|
||||
public long getFileCount() {return fileCount;}
|
||||
|
||||
/** Return the directory quota */
|
||||
public long getQuota() {return quota;}
|
||||
|
||||
/** Retuns (disk) space consumed */
|
||||
public long getSpaceConsumed() {return spaceConsumed;}
|
||||
|
||||
/** Returns (disk) space quota */
|
||||
public long getSpaceQuota() {return spaceQuota;}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeLong(length);
|
||||
out.writeLong(fileCount);
|
||||
out.writeLong(directoryCount);
|
||||
out.writeLong(quota);
|
||||
out.writeLong(spaceConsumed);
|
||||
out.writeLong(spaceQuota);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
this.length = in.readLong();
|
||||
this.fileCount = in.readLong();
|
||||
this.directoryCount = in.readLong();
|
||||
this.quota = in.readLong();
|
||||
this.spaceConsumed = in.readLong();
|
||||
this.spaceQuota = in.readLong();
|
||||
}
|
||||
|
||||
/**
|
||||
* Output format:
|
||||
* <----12----> <----12----> <-------18------->
|
||||
* DIR_COUNT FILE_COUNT CONTENT_SIZE FILE_NAME
|
||||
*/
|
||||
private static final String STRING_FORMAT = "%12d %12d %18d ";
|
||||
/**
|
||||
* Output format:
|
||||
* <----12----> <----15----> <----15----> <----15----> <----12----> <----12----> <-------18------->
|
||||
* QUOTA REMAINING_QUATA SPACE_QUOTA SPACE_QUOTA_REM DIR_COUNT FILE_COUNT CONTENT_SIZE FILE_NAME
|
||||
*/
|
||||
private static final String QUOTA_STRING_FORMAT = "%12s %15s ";
|
||||
private static final String SPACE_QUOTA_STRING_FORMAT = "%15s %15s ";
|
||||
|
||||
/** The header string */
|
||||
private static final String HEADER = String.format(
|
||||
STRING_FORMAT.replace('d', 's'), "directories", "files", "bytes");
|
||||
|
||||
private static final String QUOTA_HEADER = String.format(
|
||||
QUOTA_STRING_FORMAT + SPACE_QUOTA_STRING_FORMAT,
|
||||
"quota", "remaining quota", "space quota", "reamaining quota") +
|
||||
HEADER;
|
||||
|
||||
/** Return the header of the output.
|
||||
* if qOption is false, output directory count, file count, and content size;
|
||||
* if qOption is true, output quota and remaining quota as well.
|
||||
*
|
||||
* @param qOption a flag indicating if quota needs to be printed or not
|
||||
* @return the header of the output
|
||||
*/
|
||||
public static String getHeader(boolean qOption) {
|
||||
return qOption ? QUOTA_HEADER : HEADER;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public String toString() {
|
||||
return toString(true);
|
||||
}
|
||||
|
||||
/** Return the string representation of the object in the output format.
|
||||
* if qOption is false, output directory count, file count, and content size;
|
||||
* if qOption is true, output quota and remaining quota as well.
|
||||
*
|
||||
* @param qOption a flag indicating if quota needs to be printed or not
|
||||
* @return the string representation of the object
|
||||
*/
|
||||
public String toString(boolean qOption) {
|
||||
String prefix = "";
|
||||
if (qOption) {
|
||||
String quotaStr = "none";
|
||||
String quotaRem = "inf";
|
||||
String spaceQuotaStr = "none";
|
||||
String spaceQuotaRem = "inf";
|
||||
|
||||
if (quota>0) {
|
||||
quotaStr = Long.toString(quota);
|
||||
quotaRem = Long.toString(quota-(directoryCount+fileCount));
|
||||
}
|
||||
if (spaceQuota>0) {
|
||||
spaceQuotaStr = Long.toString(spaceQuota);
|
||||
spaceQuotaRem = Long.toString(spaceQuota - spaceConsumed);
|
||||
}
|
||||
|
||||
prefix = String.format(QUOTA_STRING_FORMAT + SPACE_QUOTA_STRING_FORMAT,
|
||||
quotaStr, quotaRem, spaceQuotaStr, spaceQuotaRem);
|
||||
}
|
||||
|
||||
return prefix + String.format(STRING_FORMAT, directoryCount,
|
||||
fileCount, length);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,193 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.BufferedReader;
|
||||
|
||||
import java.util.EnumSet;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.util.Shell;
|
||||
|
||||
/** Filesystem disk space usage statistics. Uses the unix 'df' program.
|
||||
* Tested on Linux, FreeBSD, Cygwin. */
|
||||
public class DF extends Shell {
|
||||
public static final long DF_INTERVAL_DEFAULT = 3 * 1000; // default DF refresh interval
|
||||
|
||||
private String dirPath;
|
||||
private String filesystem;
|
||||
private long capacity;
|
||||
private long used;
|
||||
private long available;
|
||||
private int percentUsed;
|
||||
private String mount;
|
||||
|
||||
enum OSType {
|
||||
OS_TYPE_UNIX("UNIX"),
|
||||
OS_TYPE_WIN("Windows"),
|
||||
OS_TYPE_SOLARIS("SunOS"),
|
||||
OS_TYPE_MAC("Mac"),
|
||||
OS_TYPE_AIX("AIX");
|
||||
|
||||
private String id;
|
||||
OSType(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
public boolean match(String osStr) {
|
||||
return osStr != null && osStr.indexOf(id) >= 0;
|
||||
}
|
||||
String getId() {
|
||||
return id;
|
||||
}
|
||||
}
|
||||
|
||||
private static final String OS_NAME = System.getProperty("os.name");
|
||||
private static final OSType OS_TYPE = getOSType(OS_NAME);
|
||||
|
||||
protected static OSType getOSType(String osName) {
|
||||
for (OSType ost : EnumSet.allOf(OSType.class)) {
|
||||
if (ost.match(osName)) {
|
||||
return ost;
|
||||
}
|
||||
}
|
||||
return OSType.OS_TYPE_UNIX;
|
||||
}
|
||||
|
||||
public DF(File path, Configuration conf) throws IOException {
|
||||
this(path, conf.getLong("dfs.df.interval", DF.DF_INTERVAL_DEFAULT));
|
||||
}
|
||||
|
||||
public DF(File path, long dfInterval) throws IOException {
|
||||
super(dfInterval);
|
||||
this.dirPath = path.getCanonicalPath();
|
||||
}
|
||||
|
||||
protected OSType getOSType() {
|
||||
return OS_TYPE;
|
||||
}
|
||||
|
||||
/// ACCESSORS
|
||||
|
||||
public String getDirPath() {
|
||||
return dirPath;
|
||||
}
|
||||
|
||||
public String getFilesystem() throws IOException {
|
||||
run();
|
||||
return filesystem;
|
||||
}
|
||||
|
||||
public long getCapacity() throws IOException {
|
||||
run();
|
||||
return capacity;
|
||||
}
|
||||
|
||||
public long getUsed() throws IOException {
|
||||
run();
|
||||
return used;
|
||||
}
|
||||
|
||||
public long getAvailable() throws IOException {
|
||||
run();
|
||||
return available;
|
||||
}
|
||||
|
||||
public int getPercentUsed() throws IOException {
|
||||
run();
|
||||
return percentUsed;
|
||||
}
|
||||
|
||||
public String getMount() throws IOException {
|
||||
run();
|
||||
return mount;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return
|
||||
"df -k " + mount +"\n" +
|
||||
filesystem + "\t" +
|
||||
capacity / 1024 + "\t" +
|
||||
used / 1024 + "\t" +
|
||||
available / 1024 + "\t" +
|
||||
percentUsed + "%\t" +
|
||||
mount;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String[] getExecString() {
|
||||
// ignoring the error since the exit code it enough
|
||||
return new String[] {"bash","-c","exec 'df' '-k' '" + dirPath
|
||||
+ "' 2>/dev/null"};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void parseExecResult(BufferedReader lines) throws IOException {
|
||||
lines.readLine(); // skip headings
|
||||
|
||||
String line = lines.readLine();
|
||||
if (line == null) {
|
||||
throw new IOException( "Expecting a line not the end of stream" );
|
||||
}
|
||||
StringTokenizer tokens =
|
||||
new StringTokenizer(line, " \t\n\r\f%");
|
||||
|
||||
this.filesystem = tokens.nextToken();
|
||||
if (!tokens.hasMoreTokens()) { // for long filesystem name
|
||||
line = lines.readLine();
|
||||
if (line == null) {
|
||||
throw new IOException( "Expecting a line not the end of stream" );
|
||||
}
|
||||
tokens = new StringTokenizer(line, " \t\n\r\f%");
|
||||
}
|
||||
|
||||
switch(getOSType()) {
|
||||
case OS_TYPE_AIX:
|
||||
this.capacity = Long.parseLong(tokens.nextToken()) * 1024;
|
||||
this.available = Long.parseLong(tokens.nextToken()) * 1024;
|
||||
this.percentUsed = Integer.parseInt(tokens.nextToken());
|
||||
tokens.nextToken();
|
||||
tokens.nextToken();
|
||||
this.mount = tokens.nextToken();
|
||||
this.used = this.capacity - this.available;
|
||||
break;
|
||||
|
||||
case OS_TYPE_WIN:
|
||||
case OS_TYPE_SOLARIS:
|
||||
case OS_TYPE_MAC:
|
||||
case OS_TYPE_UNIX:
|
||||
default:
|
||||
this.capacity = Long.parseLong(tokens.nextToken()) * 1024;
|
||||
this.used = Long.parseLong(tokens.nextToken()) * 1024;
|
||||
this.available = Long.parseLong(tokens.nextToken()) * 1024;
|
||||
this.percentUsed = Integer.parseInt(tokens.nextToken());
|
||||
this.mount = tokens.nextToken();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String path = ".";
|
||||
if (args.length > 0)
|
||||
path = args[0];
|
||||
|
||||
System.out.println(new DF(new File(path), DF_INTERVAL_DEFAULT).toString());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,198 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.util.Shell;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
/** Filesystem disk space usage statistics. Uses the unix 'du' program*/
|
||||
public class DU extends Shell {
|
||||
private String dirPath;
|
||||
|
||||
private AtomicLong used = new AtomicLong();
|
||||
private volatile boolean shouldRun = true;
|
||||
private Thread refreshUsed;
|
||||
private IOException duException = null;
|
||||
private long refreshInterval;
|
||||
|
||||
/**
|
||||
* Keeps track of disk usage.
|
||||
* @param path the path to check disk usage in
|
||||
* @param interval refresh the disk usage at this interval
|
||||
* @throws IOException if we fail to refresh the disk usage
|
||||
*/
|
||||
public DU(File path, long interval) throws IOException {
|
||||
super(0);
|
||||
|
||||
//we set the Shell interval to 0 so it will always run our command
|
||||
//and use this one to set the thread sleep interval
|
||||
this.refreshInterval = interval;
|
||||
this.dirPath = path.getCanonicalPath();
|
||||
|
||||
//populate the used variable
|
||||
run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Keeps track of disk usage.
|
||||
* @param path the path to check disk usage in
|
||||
* @param conf configuration object
|
||||
* @throws IOException if we fail to refresh the disk usage
|
||||
*/
|
||||
public DU(File path, Configuration conf) throws IOException {
|
||||
this(path, 600000L);
|
||||
//10 minutes default refresh interval
|
||||
}
|
||||
|
||||
/**
|
||||
* This thread refreshes the "used" variable.
|
||||
*
|
||||
* Future improvements could be to not permanently
|
||||
* run this thread, instead run when getUsed is called.
|
||||
**/
|
||||
class DURefreshThread implements Runnable {
|
||||
|
||||
public void run() {
|
||||
|
||||
while(shouldRun) {
|
||||
|
||||
try {
|
||||
Thread.sleep(refreshInterval);
|
||||
|
||||
try {
|
||||
//update the used variable
|
||||
DU.this.run();
|
||||
} catch (IOException e) {
|
||||
synchronized (DU.this) {
|
||||
//save the latest exception so we can return it in getUsed()
|
||||
duException = e;
|
||||
}
|
||||
|
||||
LOG.warn("Could not get disk usage information", e);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decrease how much disk space we use.
|
||||
* @param value decrease by this value
|
||||
*/
|
||||
public void decDfsUsed(long value) {
|
||||
used.addAndGet(-value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Increase how much disk space we use.
|
||||
* @param value increase by this value
|
||||
*/
|
||||
public void incDfsUsed(long value) {
|
||||
used.addAndGet(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return disk space used
|
||||
* @throws IOException if the shell command fails
|
||||
*/
|
||||
public long getUsed() throws IOException {
|
||||
//if the updating thread isn't started, update on demand
|
||||
if(refreshUsed == null) {
|
||||
run();
|
||||
} else {
|
||||
synchronized (DU.this) {
|
||||
//if an exception was thrown in the last run, rethrow
|
||||
if(duException != null) {
|
||||
IOException tmp = duException;
|
||||
duException = null;
|
||||
throw tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return used.longValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the path of which we're keeping track of disk usage
|
||||
*/
|
||||
public String getDirPath() {
|
||||
return dirPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the disk usage checking thread.
|
||||
*/
|
||||
public void start() {
|
||||
//only start the thread if the interval is sane
|
||||
if(refreshInterval > 0) {
|
||||
refreshUsed = new Thread(new DURefreshThread(),
|
||||
"refreshUsed-"+dirPath);
|
||||
refreshUsed.setDaemon(true);
|
||||
refreshUsed.start();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Shut down the refreshing thread.
|
||||
*/
|
||||
public void shutdown() {
|
||||
this.shouldRun = false;
|
||||
|
||||
if(this.refreshUsed != null) {
|
||||
this.refreshUsed.interrupt();
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return
|
||||
"du -sk " + dirPath +"\n" +
|
||||
used + "\t" + dirPath;
|
||||
}
|
||||
|
||||
protected String[] getExecString() {
|
||||
return new String[] {"du", "-sk", dirPath};
|
||||
}
|
||||
|
||||
protected void parseExecResult(BufferedReader lines) throws IOException {
|
||||
String line = lines.readLine();
|
||||
if (line == null) {
|
||||
throw new IOException("Expecting a line not the end of stream");
|
||||
}
|
||||
String[] tokens = line.split("\t");
|
||||
if(tokens.length == 0) {
|
||||
throw new IOException("Illegal du output");
|
||||
}
|
||||
this.used.set(Long.parseLong(tokens[0])*1024);
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String path = ".";
|
||||
if (args.length > 0) {
|
||||
path = args[0];
|
||||
}
|
||||
|
||||
System.out.println(new DU(new File(path), new Configuration()).toString());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/** Utility that wraps a {@link FSInputStream} in a {@link DataInputStream}
|
||||
* and buffers input through a {@link BufferedInputStream}. */
|
||||
public class FSDataInputStream extends DataInputStream
|
||||
implements Seekable, PositionedReadable {
|
||||
|
||||
public FSDataInputStream(InputStream in)
|
||||
throws IOException {
|
||||
super(in);
|
||||
if( !(in instanceof Seekable) || !(in instanceof PositionedReadable) ) {
|
||||
throw new IllegalArgumentException(
|
||||
"In is not an instance of Seekable or PositionedReadable");
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void seek(long desired) throws IOException {
|
||||
((Seekable)in).seek(desired);
|
||||
}
|
||||
|
||||
public long getPos() throws IOException {
|
||||
return ((Seekable)in).getPos();
|
||||
}
|
||||
|
||||
public int read(long position, byte[] buffer, int offset, int length)
|
||||
throws IOException {
|
||||
return ((PositionedReadable)in).read(position, buffer, offset, length);
|
||||
}
|
||||
|
||||
public void readFully(long position, byte[] buffer, int offset, int length)
|
||||
throws IOException {
|
||||
((PositionedReadable)in).readFully(position, buffer, offset, length);
|
||||
}
|
||||
|
||||
public void readFully(long position, byte[] buffer)
|
||||
throws IOException {
|
||||
((PositionedReadable)in).readFully(position, buffer, 0, buffer.length);
|
||||
}
|
||||
|
||||
public boolean seekToNewSource(long targetPos) throws IOException {
|
||||
return ((Seekable)in).seekToNewSource(targetPos);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/** Utility that wraps a {@link OutputStream} in a {@link DataOutputStream},
|
||||
* buffers output through a {@link BufferedOutputStream} and creates a checksum
|
||||
* file. */
|
||||
public class FSDataOutputStream extends DataOutputStream implements Syncable {
|
||||
private OutputStream wrappedStream;
|
||||
|
||||
private static class PositionCache extends FilterOutputStream {
|
||||
private FileSystem.Statistics statistics;
|
||||
long position;
|
||||
|
||||
public PositionCache(OutputStream out,
|
||||
FileSystem.Statistics stats,
|
||||
long pos) throws IOException {
|
||||
super(out);
|
||||
statistics = stats;
|
||||
position = pos;
|
||||
}
|
||||
|
||||
public void write(int b) throws IOException {
|
||||
out.write(b);
|
||||
position++;
|
||||
if (statistics != null) {
|
||||
statistics.incrementBytesWritten(1);
|
||||
}
|
||||
}
|
||||
|
||||
public void write(byte b[], int off, int len) throws IOException {
|
||||
out.write(b, off, len);
|
||||
position += len; // update position
|
||||
if (statistics != null) {
|
||||
statistics.incrementBytesWritten(len);
|
||||
}
|
||||
}
|
||||
|
||||
public long getPos() throws IOException {
|
||||
return position; // return cached position
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public FSDataOutputStream(OutputStream out) throws IOException {
|
||||
this(out, null);
|
||||
}
|
||||
|
||||
public FSDataOutputStream(OutputStream out, FileSystem.Statistics stats)
|
||||
throws IOException {
|
||||
this(out, stats, 0);
|
||||
}
|
||||
|
||||
public FSDataOutputStream(OutputStream out, FileSystem.Statistics stats,
|
||||
long startPosition) throws IOException {
|
||||
super(new PositionCache(out, stats, startPosition));
|
||||
wrappedStream = out;
|
||||
}
|
||||
|
||||
public long getPos() throws IOException {
|
||||
return ((PositionCache)out).getPos();
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
out.close(); // This invokes PositionCache.close()
|
||||
}
|
||||
|
||||
// Returns the underlying output stream. This is used by unit tests.
|
||||
public OutputStream getWrappedStream() {
|
||||
return wrappedStream;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void sync() throws IOException {
|
||||
if (wrappedStream instanceof Syncable) {
|
||||
((Syncable)wrappedStream).sync();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
/** Thrown for unexpected filesystem errors, presumed to reflect disk errors
|
||||
* in the native filesystem. */
|
||||
public class FSError extends Error {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
FSError(Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,432 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
|
||||
/**
|
||||
* This is a generic input stream for verifying checksums for
|
||||
* data before it is read by a user.
|
||||
*/
|
||||
|
||||
abstract public class FSInputChecker extends FSInputStream {
|
||||
public static final Log LOG
|
||||
= LogFactory.getLog(FSInputChecker.class);
|
||||
|
||||
/** The file name from which data is read from */
|
||||
protected Path file;
|
||||
private Checksum sum;
|
||||
private boolean verifyChecksum = true;
|
||||
private byte[] buf;
|
||||
private byte[] checksum;
|
||||
private int pos;
|
||||
private int count;
|
||||
|
||||
private int numOfRetries;
|
||||
|
||||
// cached file position
|
||||
private long chunkPos = 0;
|
||||
|
||||
/** Constructor
|
||||
*
|
||||
* @param file The name of the file to be read
|
||||
* @param numOfRetries Number of read retries when ChecksumError occurs
|
||||
*/
|
||||
protected FSInputChecker( Path file, int numOfRetries) {
|
||||
this.file = file;
|
||||
this.numOfRetries = numOfRetries;
|
||||
}
|
||||
|
||||
/** Constructor
|
||||
*
|
||||
* @param file The name of the file to be read
|
||||
* @param numOfRetries Number of read retries when ChecksumError occurs
|
||||
* @param sum the type of Checksum engine
|
||||
* @param chunkSize maximun chunk size
|
||||
* @param checksumSize the number byte of each checksum
|
||||
*/
|
||||
protected FSInputChecker( Path file, int numOfRetries,
|
||||
boolean verifyChecksum, Checksum sum, int chunkSize, int checksumSize ) {
|
||||
this(file, numOfRetries);
|
||||
set(verifyChecksum, sum, chunkSize, checksumSize);
|
||||
}
|
||||
|
||||
/** Reads in next checksum chunk data into <code>buf</code> at <code>offset</code>
|
||||
* and checksum into <code>checksum</code>.
|
||||
* The method is used for implementing read, therefore, it should be optimized
|
||||
* for sequential reading
|
||||
* @param pos chunkPos
|
||||
* @param buf desitination buffer
|
||||
* @param offset offset in buf at which to store data
|
||||
* @param len maximun number of bytes to read
|
||||
* @return number of bytes read
|
||||
*/
|
||||
abstract protected int readChunk(long pos, byte[] buf, int offset, int len,
|
||||
byte[] checksum) throws IOException;
|
||||
|
||||
/** Return position of beginning of chunk containing pos.
|
||||
*
|
||||
* @param pos a postion in the file
|
||||
* @return the starting position of the chunk which contains the byte
|
||||
*/
|
||||
abstract protected long getChunkPosition(long pos);
|
||||
|
||||
/** Return true if there is a need for checksum verification */
|
||||
protected synchronized boolean needChecksum() {
|
||||
return verifyChecksum && sum != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read one checksum-verified byte
|
||||
*
|
||||
* @return the next byte of data, or <code>-1</code> if the end of the
|
||||
* stream is reached.
|
||||
* @exception IOException if an I/O error occurs.
|
||||
*/
|
||||
|
||||
public synchronized int read() throws IOException {
|
||||
if (pos >= count) {
|
||||
fill();
|
||||
if (pos >= count) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return buf[pos++] & 0xff;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read checksum verified bytes from this byte-input stream into
|
||||
* the specified byte array, starting at the given offset.
|
||||
*
|
||||
* <p> This method implements the general contract of the corresponding
|
||||
* <code>{@link InputStream#read(byte[], int, int) read}</code> method of
|
||||
* the <code>{@link InputStream}</code> class. As an additional
|
||||
* convenience, it attempts to read as many bytes as possible by repeatedly
|
||||
* invoking the <code>read</code> method of the underlying stream. This
|
||||
* iterated <code>read</code> continues until one of the following
|
||||
* conditions becomes true: <ul>
|
||||
*
|
||||
* <li> The specified number of bytes have been read,
|
||||
*
|
||||
* <li> The <code>read</code> method of the underlying stream returns
|
||||
* <code>-1</code>, indicating end-of-file.
|
||||
*
|
||||
* </ul> If the first <code>read</code> on the underlying stream returns
|
||||
* <code>-1</code> to indicate end-of-file then this method returns
|
||||
* <code>-1</code>. Otherwise this method returns the number of bytes
|
||||
* actually read.
|
||||
*
|
||||
* @param b destination buffer.
|
||||
* @param off offset at which to start storing bytes.
|
||||
* @param len maximum number of bytes to read.
|
||||
* @return the number of bytes read, or <code>-1</code> if the end of
|
||||
* the stream has been reached.
|
||||
* @exception IOException if an I/O error occurs.
|
||||
* ChecksumException if any checksum error occurs
|
||||
*/
|
||||
public synchronized int read(byte[] b, int off, int len) throws IOException {
|
||||
// parameter check
|
||||
if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
|
||||
throw new IndexOutOfBoundsException();
|
||||
} else if (len == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int n = 0;
|
||||
for (;;) {
|
||||
int nread = read1(b, off + n, len - n);
|
||||
if (nread <= 0)
|
||||
return (n == 0) ? nread : n;
|
||||
n += nread;
|
||||
if (n >= len)
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills the buffer with a chunk data.
|
||||
* No mark is supported.
|
||||
* This method assumes that all data in the buffer has already been read in,
|
||||
* hence pos > count.
|
||||
*/
|
||||
private void fill( ) throws IOException {
|
||||
assert(pos>=count);
|
||||
// fill internal buffer
|
||||
count = readChecksumChunk(buf, 0, buf.length);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read characters into a portion of an array, reading from the underlying
|
||||
* stream at most once if necessary.
|
||||
*/
|
||||
private int read1(byte b[], int off, int len)
|
||||
throws IOException {
|
||||
int avail = count-pos;
|
||||
if( avail <= 0 ) {
|
||||
if(len>=buf.length) {
|
||||
// read a chunk to user buffer directly; avoid one copy
|
||||
int nread = readChecksumChunk(b, off, len);
|
||||
return nread;
|
||||
} else {
|
||||
// read a chunk into the local buffer
|
||||
fill();
|
||||
if( count <= 0 ) {
|
||||
return -1;
|
||||
} else {
|
||||
avail = count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// copy content of the local buffer to the user buffer
|
||||
int cnt = (avail < len) ? avail : len;
|
||||
System.arraycopy(buf, pos, b, off, cnt);
|
||||
pos += cnt;
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/* Read up one checksum chunk to array <i>b</i> at pos <i>off</i>
|
||||
* It requires a checksum chunk boundary
|
||||
* in between <cur_pos, cur_pos+len>
|
||||
* and it stops reading at the boundary or at the end of the stream;
|
||||
* Otherwise an IllegalArgumentException is thrown.
|
||||
* This makes sure that all data read are checksum verified.
|
||||
*
|
||||
* @param b the buffer into which the data is read.
|
||||
* @param off the start offset in array <code>b</code>
|
||||
* at which the data is written.
|
||||
* @param len the maximum number of bytes to read.
|
||||
* @return the total number of bytes read into the buffer, or
|
||||
* <code>-1</code> if there is no more data because the end of
|
||||
* the stream has been reached.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
private int readChecksumChunk(byte b[], int off, int len)
|
||||
throws IOException {
|
||||
// invalidate buffer
|
||||
count = pos = 0;
|
||||
|
||||
int read = 0;
|
||||
boolean retry = true;
|
||||
int retriesLeft = numOfRetries;
|
||||
do {
|
||||
retriesLeft--;
|
||||
|
||||
try {
|
||||
read = readChunk(chunkPos, b, off, len, checksum);
|
||||
if( read > 0 ) {
|
||||
if( needChecksum() ) {
|
||||
sum.update(b, off, read);
|
||||
verifySum(chunkPos);
|
||||
}
|
||||
chunkPos += read;
|
||||
}
|
||||
retry = false;
|
||||
} catch (ChecksumException ce) {
|
||||
LOG.info("Found checksum error: b[" + off + ", " + (off+read) + "]="
|
||||
+ StringUtils.byteToHexString(b, off, off + read), ce);
|
||||
if (retriesLeft == 0) {
|
||||
throw ce;
|
||||
}
|
||||
|
||||
// try a new replica
|
||||
if (seekToNewSource(chunkPos)) {
|
||||
// Since at least one of the sources is different,
|
||||
// the read might succeed, so we'll retry.
|
||||
seek(chunkPos);
|
||||
} else {
|
||||
// Neither the data stream nor the checksum stream are being read
|
||||
// from different sources, meaning we'll still get a checksum error
|
||||
// if we try to do the read again. We throw an exception instead.
|
||||
throw ce;
|
||||
}
|
||||
}
|
||||
} while (retry);
|
||||
return read;
|
||||
}
|
||||
|
||||
/* verify checksum for the chunk.
|
||||
* @throws ChecksumException if there is a mismatch
|
||||
*/
|
||||
private void verifySum(long errPos) throws ChecksumException {
|
||||
long crc = getChecksum();
|
||||
long sumValue = sum.getValue();
|
||||
sum.reset();
|
||||
if (crc != sumValue) {
|
||||
throw new ChecksumException(
|
||||
"Checksum error: "+file+" at "+errPos, errPos);
|
||||
}
|
||||
}
|
||||
|
||||
/* calculate checksum value */
|
||||
private long getChecksum() {
|
||||
return checksum2long(checksum);
|
||||
}
|
||||
|
||||
/** Convert a checksum byte array to a long */
|
||||
static public long checksum2long(byte[] checksum) {
|
||||
long crc = 0L;
|
||||
for(int i=0; i<checksum.length; i++) {
|
||||
crc |= (0xffL&(long)checksum[i])<<((checksum.length-i-1)*8);
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized long getPos() throws IOException {
|
||||
return chunkPos-(count-pos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int available() throws IOException {
|
||||
return count-pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips over and discards <code>n</code> bytes of data from the
|
||||
* input stream.
|
||||
*
|
||||
* <p>This method may skip more bytes than are remaining in the backing
|
||||
* file. This produces no exception and the number of bytes skipped
|
||||
* may include some number of bytes that were beyond the EOF of the
|
||||
* backing file. Attempting to read from the stream after skipping past
|
||||
* the end will result in -1 indicating the end of the file.
|
||||
*
|
||||
*<p>If <code>n</code> is negative, no bytes are skipped.
|
||||
*
|
||||
* @param n the number of bytes to be skipped.
|
||||
* @return the actual number of bytes skipped.
|
||||
* @exception IOException if an I/O error occurs.
|
||||
* ChecksumException if the chunk to skip to is corrupted
|
||||
*/
|
||||
public synchronized long skip(long n) throws IOException {
|
||||
if (n <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
seek(getPos()+n);
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Seek to the given position in the stream.
|
||||
* The next read() will be from that position.
|
||||
*
|
||||
* <p>This method may seek past the end of the file.
|
||||
* This produces no exception and an attempt to read from
|
||||
* the stream will result in -1 indicating the end of the file.
|
||||
*
|
||||
* @param pos the postion to seek to.
|
||||
* @exception IOException if an I/O error occurs.
|
||||
* ChecksumException if the chunk to seek to is corrupted
|
||||
*/
|
||||
|
||||
public synchronized void seek(long pos) throws IOException {
|
||||
if( pos<0 ) {
|
||||
return;
|
||||
}
|
||||
// optimize: check if the pos is in the buffer
|
||||
long start = chunkPos - this.count;
|
||||
if( pos>=start && pos<chunkPos) {
|
||||
this.pos = (int)(pos-start);
|
||||
return;
|
||||
}
|
||||
|
||||
// reset the current state
|
||||
resetState();
|
||||
|
||||
// seek to a checksum boundary
|
||||
chunkPos = getChunkPosition(pos);
|
||||
|
||||
// scan to the desired position
|
||||
int delta = (int)(pos - chunkPos);
|
||||
if( delta > 0) {
|
||||
readFully(this, new byte[delta], 0, delta);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A utility function that tries to read up to <code>len</code> bytes from
|
||||
* <code>stm</code>
|
||||
*
|
||||
* @param stm an input stream
|
||||
* @param buf destiniation buffer
|
||||
* @param offset offset at which to store data
|
||||
* @param len number of bytes to read
|
||||
* @return actual number of bytes read
|
||||
* @throws IOException if there is any IO error
|
||||
*/
|
||||
protected static int readFully(InputStream stm,
|
||||
byte[] buf, int offset, int len) throws IOException {
|
||||
int n = 0;
|
||||
for (;;) {
|
||||
int nread = stm.read(buf, offset + n, len - n);
|
||||
if (nread <= 0)
|
||||
return (n == 0) ? nread : n;
|
||||
n += nread;
|
||||
if (n >= len)
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the checksum related parameters
|
||||
* @param verifyChecksum whether to verify checksum
|
||||
* @param sum which type of checksum to use
|
||||
* @param maxChunkSize maximun chunk size
|
||||
* @param checksumSize checksum size
|
||||
*/
|
||||
final protected synchronized void set(boolean verifyChecksum,
|
||||
Checksum sum, int maxChunkSize, int checksumSize ) {
|
||||
this.verifyChecksum = verifyChecksum;
|
||||
this.sum = sum;
|
||||
this.buf = new byte[maxChunkSize];
|
||||
this.checksum = new byte[checksumSize];
|
||||
this.count = 0;
|
||||
this.pos = 0;
|
||||
}
|
||||
|
||||
final public boolean markSupported() {
|
||||
return false;
|
||||
}
|
||||
|
||||
final public void mark(int readlimit) {
|
||||
}
|
||||
|
||||
final public void reset() throws IOException {
|
||||
throw new IOException("mark/reset not supported");
|
||||
}
|
||||
|
||||
|
||||
/* reset this FSInputChecker's state */
|
||||
private void resetState() {
|
||||
// invalidate buffer
|
||||
count = 0;
|
||||
pos = 0;
|
||||
// reset Checksum
|
||||
if (sum != null) {
|
||||
sum.reset();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/****************************************************************
|
||||
* FSInputStream is a generic old InputStream with a little bit
|
||||
* of RAF-style seek ability.
|
||||
*
|
||||
*****************************************************************/
|
||||
public abstract class FSInputStream extends InputStream
|
||||
implements Seekable, PositionedReadable {
|
||||
/**
|
||||
* Seek to the given offset from the start of the file.
|
||||
* The next read() will be from that location. Can't
|
||||
* seek past the end of the file.
|
||||
*/
|
||||
public abstract void seek(long pos) throws IOException;
|
||||
|
||||
/**
|
||||
* Return the current offset from the start of the file
|
||||
*/
|
||||
public abstract long getPos() throws IOException;
|
||||
|
||||
/**
|
||||
* Seeks a different copy of the data. Returns true if
|
||||
* found a new source, false otherwise.
|
||||
*/
|
||||
public abstract boolean seekToNewSource(long targetPos) throws IOException;
|
||||
|
||||
public int read(long position, byte[] buffer, int offset, int length)
|
||||
throws IOException {
|
||||
synchronized (this) {
|
||||
long oldPos = getPos();
|
||||
int nread = -1;
|
||||
try {
|
||||
seek(position);
|
||||
nread = read(buffer, offset, length);
|
||||
} finally {
|
||||
seek(oldPos);
|
||||
}
|
||||
return nread;
|
||||
}
|
||||
}
|
||||
|
||||
public void readFully(long position, byte[] buffer, int offset, int length)
|
||||
throws IOException {
|
||||
int nread = 0;
|
||||
while (nread < length) {
|
||||
int nbytes = read(position+nread, buffer, offset+nread, length-nread);
|
||||
if (nbytes < 0) {
|
||||
throw new EOFException("End of file reached before reading fully.");
|
||||
}
|
||||
nread += nbytes;
|
||||
}
|
||||
}
|
||||
|
||||
public void readFully(long position, byte[] buffer)
|
||||
throws IOException {
|
||||
readFully(position, buffer, 0, buffer.length);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,176 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
/**
|
||||
* This is a generic output stream for generating checksums for
|
||||
* data before it is written to the underlying stream
|
||||
*/
|
||||
|
||||
abstract public class FSOutputSummer extends OutputStream {
|
||||
// data checksum
|
||||
private Checksum sum;
|
||||
// internal buffer for storing data before it is checksumed
|
||||
private byte buf[];
|
||||
// internal buffer for storing checksum
|
||||
private byte checksum[];
|
||||
// The number of valid bytes in the buffer.
|
||||
private int count;
|
||||
|
||||
protected FSOutputSummer(Checksum sum, int maxChunkSize, int checksumSize) {
|
||||
this.sum = sum;
|
||||
this.buf = new byte[maxChunkSize];
|
||||
this.checksum = new byte[checksumSize];
|
||||
this.count = 0;
|
||||
}
|
||||
|
||||
/* write the data chunk in <code>b</code> staring at <code>offset</code> with
|
||||
* a length of <code>len</code>, and its checksum
|
||||
*/
|
||||
protected abstract void writeChunk(byte[] b, int offset, int len, byte[] checksum)
|
||||
throws IOException;
|
||||
|
||||
/** Write one byte */
|
||||
public synchronized void write(int b) throws IOException {
|
||||
sum.update(b);
|
||||
buf[count++] = (byte)b;
|
||||
if(count == buf.length) {
|
||||
flushBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes <code>len</code> bytes from the specified byte array
|
||||
* starting at offset <code>off</code> and generate a checksum for
|
||||
* each data chunk.
|
||||
*
|
||||
* <p> This method stores bytes from the given array into this
|
||||
* stream's buffer before it gets checksumed. The buffer gets checksumed
|
||||
* and flushed to the underlying output stream when all data
|
||||
* in a checksum chunk are in the buffer. If the buffer is empty and
|
||||
* requested length is at least as large as the size of next checksum chunk
|
||||
* size, this method will checksum and write the chunk directly
|
||||
* to the underlying output stream. Thus it avoids uneccessary data copy.
|
||||
*
|
||||
* @param b the data.
|
||||
* @param off the start offset in the data.
|
||||
* @param len the number of bytes to write.
|
||||
* @exception IOException if an I/O error occurs.
|
||||
*/
|
||||
public synchronized void write(byte b[], int off, int len)
|
||||
throws IOException {
|
||||
if (off < 0 || len < 0 || off > b.length - len) {
|
||||
throw new ArrayIndexOutOfBoundsException();
|
||||
}
|
||||
|
||||
for (int n=0;n<len;n+=write1(b, off+n, len-n)) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a portion of an array, flushing to the underlying
|
||||
* stream at most once if necessary.
|
||||
*/
|
||||
private int write1(byte b[], int off, int len) throws IOException {
|
||||
if(count==0 && len>=buf.length) {
|
||||
// local buffer is empty and user data has one chunk
|
||||
// checksum and output data
|
||||
final int length = buf.length;
|
||||
sum.update(b, off, length);
|
||||
writeChecksumChunk(b, off, length, false);
|
||||
return length;
|
||||
}
|
||||
|
||||
// copy user data to local buffer
|
||||
int bytesToCopy = buf.length-count;
|
||||
bytesToCopy = (len<bytesToCopy) ? len : bytesToCopy;
|
||||
sum.update(b, off, bytesToCopy);
|
||||
System.arraycopy(b, off, buf, count, bytesToCopy);
|
||||
count += bytesToCopy;
|
||||
if (count == buf.length) {
|
||||
// local buffer is full
|
||||
flushBuffer();
|
||||
}
|
||||
return bytesToCopy;
|
||||
}
|
||||
|
||||
/* Forces any buffered output bytes to be checksumed and written out to
|
||||
* the underlying output stream.
|
||||
*/
|
||||
protected synchronized void flushBuffer() throws IOException {
|
||||
flushBuffer(false);
|
||||
}
|
||||
|
||||
/* Forces any buffered output bytes to be checksumed and written out to
|
||||
* the underlying output stream. If keep is true, then the state of
|
||||
* this object remains intact.
|
||||
*/
|
||||
protected synchronized void flushBuffer(boolean keep) throws IOException {
|
||||
if (count != 0) {
|
||||
int chunkLen = count;
|
||||
count = 0;
|
||||
writeChecksumChunk(buf, 0, chunkLen, keep);
|
||||
if (keep) {
|
||||
count = chunkLen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate checksum for the data chunk and output data chunk & checksum
|
||||
* to the underlying output stream. If keep is true then keep the
|
||||
* current checksum intact, do not reset it.
|
||||
*/
|
||||
private void writeChecksumChunk(byte b[], int off, int len, boolean keep)
|
||||
throws IOException {
|
||||
int tempChecksum = (int)sum.getValue();
|
||||
if (!keep) {
|
||||
sum.reset();
|
||||
}
|
||||
int2byte(tempChecksum, checksum);
|
||||
writeChunk(b, off, len, checksum);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a checksum integer value to a byte stream
|
||||
*/
|
||||
static public byte[] convertToByteStream(Checksum sum, int checksumSize) {
|
||||
return int2byte((int)sum.getValue(), new byte[checksumSize]);
|
||||
}
|
||||
|
||||
static byte[] int2byte(int integer, byte[] bytes) {
|
||||
bytes[0] = (byte)((integer >>> 24) & 0xFF);
|
||||
bytes[1] = (byte)((integer >>> 16) & 0xFF);
|
||||
bytes[2] = (byte)((integer >>> 8) & 0xFF);
|
||||
bytes[3] = (byte)((integer >>> 0) & 0xFF);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets existing buffer with a new one of the specified size.
|
||||
*/
|
||||
protected synchronized void resetChecksumChunk(int size) {
|
||||
sum.reset();
|
||||
this.buf = new byte[size];
|
||||
this.count = 0;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
/** An abstract class representing file checksums for files. */
|
||||
public abstract class FileChecksum implements Writable {
|
||||
/** The checksum algorithm name */
|
||||
public abstract String getAlgorithmName();
|
||||
|
||||
/** The length of the checksum in bytes */
|
||||
public abstract int getLength();
|
||||
|
||||
/** The value of the checksum in bytes */
|
||||
public abstract byte[] getBytes();
|
||||
|
||||
/** Return true if both the algorithms and the values are the same. */
|
||||
public boolean equals(Object other) {
|
||||
if (other == this) {
|
||||
return true;
|
||||
}
|
||||
if (other == null || !(other instanceof FileChecksum)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final FileChecksum that = (FileChecksum)other;
|
||||
return this.getAlgorithmName().equals(that.getAlgorithmName())
|
||||
&& Arrays.equals(this.getBytes(), that.getBytes());
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public int hashCode() {
|
||||
return getAlgorithmName().hashCode() ^ Arrays.hashCode(getBytes());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,252 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
/** Interface that represents the client side information for a file.
|
||||
*/
|
||||
public class FileStatus implements Writable, Comparable {
|
||||
|
||||
private Path path;
|
||||
private long length;
|
||||
private boolean isdir;
|
||||
private short block_replication;
|
||||
private long blocksize;
|
||||
private long modification_time;
|
||||
private long access_time;
|
||||
private FsPermission permission;
|
||||
private String owner;
|
||||
private String group;
|
||||
|
||||
public FileStatus() { this(0, false, 0, 0, 0, 0, null, null, null, null); }
|
||||
|
||||
//We should deprecate this soon?
|
||||
public FileStatus(long length, boolean isdir, int block_replication,
|
||||
long blocksize, long modification_time, Path path) {
|
||||
|
||||
this(length, isdir, block_replication, blocksize, modification_time,
|
||||
0, null, null, null, path);
|
||||
}
|
||||
|
||||
public FileStatus(long length, boolean isdir, int block_replication,
|
||||
long blocksize, long modification_time, long access_time,
|
||||
FsPermission permission, String owner, String group,
|
||||
Path path) {
|
||||
this.length = length;
|
||||
this.isdir = isdir;
|
||||
this.block_replication = (short)block_replication;
|
||||
this.blocksize = blocksize;
|
||||
this.modification_time = modification_time;
|
||||
this.access_time = access_time;
|
||||
this.permission = (permission == null) ?
|
||||
FsPermission.getDefault() : permission;
|
||||
this.owner = (owner == null) ? "" : owner;
|
||||
this.group = (group == null) ? "" : group;
|
||||
this.path = path;
|
||||
}
|
||||
|
||||
/*
|
||||
* @return the length of this file, in blocks
|
||||
*/
|
||||
public long getLen() {
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this a directory?
|
||||
* @return true if this is a directory
|
||||
*/
|
||||
public boolean isDir() {
|
||||
return isdir;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the block size of the file.
|
||||
* @return the number of bytes
|
||||
*/
|
||||
public long getBlockSize() {
|
||||
return blocksize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the replication factor of a file.
|
||||
* @return the replication factor of a file.
|
||||
*/
|
||||
public short getReplication() {
|
||||
return block_replication;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the modification time of the file.
|
||||
* @return the modification time of file in milliseconds since January 1, 1970 UTC.
|
||||
*/
|
||||
public long getModificationTime() {
|
||||
return modification_time;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the access time of the file.
|
||||
* @return the access time of file in milliseconds since January 1, 1970 UTC.
|
||||
*/
|
||||
public long getAccessTime() {
|
||||
return access_time;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get FsPermission associated with the file.
|
||||
* @return permssion. If a filesystem does not have a notion of permissions
|
||||
* or if permissions could not be determined, then default
|
||||
* permissions equivalent of "rwxrwxrwx" is returned.
|
||||
*/
|
||||
public FsPermission getPermission() {
|
||||
return permission;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the owner of the file.
|
||||
* @return owner of the file. The string could be empty if there is no
|
||||
* notion of owner of a file in a filesystem or if it could not
|
||||
* be determined (rare).
|
||||
*/
|
||||
public String getOwner() {
|
||||
return owner;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the group associated with the file.
|
||||
* @return group for the file. The string could be empty if there is no
|
||||
* notion of group of a file in a filesystem or if it could not
|
||||
* be determined (rare).
|
||||
*/
|
||||
public String getGroup() {
|
||||
return group;
|
||||
}
|
||||
|
||||
public Path getPath() {
|
||||
return path;
|
||||
}
|
||||
|
||||
/* These are provided so that these values could be loaded lazily
|
||||
* by a filesystem (e.g. local file system).
|
||||
*/
|
||||
|
||||
/**
|
||||
* Sets permission.
|
||||
* @param permission if permission is null, default value is set
|
||||
*/
|
||||
protected void setPermission(FsPermission permission) {
|
||||
this.permission = (permission == null) ?
|
||||
FsPermission.getDefault() : permission;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets owner.
|
||||
* @param owner if it is null, default value is set
|
||||
*/
|
||||
protected void setOwner(String owner) {
|
||||
this.owner = (owner == null) ? "" : owner;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets group.
|
||||
* @param group if it is null, default value is set
|
||||
*/
|
||||
protected void setGroup(String group) {
|
||||
this.group = (group == null) ? "" : group;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// Writable
|
||||
//////////////////////////////////////////////////
|
||||
public void write(DataOutput out) throws IOException {
|
||||
Text.writeString(out, getPath().toString());
|
||||
out.writeLong(length);
|
||||
out.writeBoolean(isdir);
|
||||
out.writeShort(block_replication);
|
||||
out.writeLong(blocksize);
|
||||
out.writeLong(modification_time);
|
||||
out.writeLong(access_time);
|
||||
permission.write(out);
|
||||
Text.writeString(out, owner);
|
||||
Text.writeString(out, group);
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
String strPath = Text.readString(in);
|
||||
this.path = new Path(strPath);
|
||||
this.length = in.readLong();
|
||||
this.isdir = in.readBoolean();
|
||||
this.block_replication = in.readShort();
|
||||
blocksize = in.readLong();
|
||||
modification_time = in.readLong();
|
||||
access_time = in.readLong();
|
||||
permission.readFields(in);
|
||||
owner = Text.readString(in);
|
||||
group = Text.readString(in);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare this object to another object
|
||||
*
|
||||
* @param o the object to be compared.
|
||||
* @return a negative integer, zero, or a positive integer as this object
|
||||
* is less than, equal to, or greater than the specified object.
|
||||
*
|
||||
* @throws ClassCastException if the specified object's is not of
|
||||
* type FileStatus
|
||||
*/
|
||||
public int compareTo(Object o) {
|
||||
FileStatus other = (FileStatus)o;
|
||||
return this.getPath().compareTo(other.getPath());
|
||||
}
|
||||
|
||||
/** Compare if this object is equal to another object
|
||||
* @param o the object to be compared.
|
||||
* @return true if two file status has the same path name; false if not.
|
||||
*/
|
||||
public boolean equals(Object o) {
|
||||
if (o == null) {
|
||||
return false;
|
||||
}
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (!(o instanceof FileStatus)) {
|
||||
return false;
|
||||
}
|
||||
FileStatus other = (FileStatus)o;
|
||||
return this.getPath().equals(other.getPath());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash code value for the object, which is defined as
|
||||
* the hash code of the path name.
|
||||
*
|
||||
* @return a hash code value for the path name.
|
||||
*/
|
||||
public int hashCode() {
|
||||
return getPath().hashCode();
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,794 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Enumeration;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipFile;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.util.Shell;
|
||||
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
|
||||
import org.mortbay.log.Log;
|
||||
|
||||
/**
|
||||
* A collection of file-processing util methods
|
||||
*/
|
||||
public class FileUtil {
|
||||
/**
|
||||
* convert an array of FileStatus to an array of Path
|
||||
*
|
||||
* @param stats
|
||||
* an array of FileStatus objects
|
||||
* @return an array of paths corresponding to the input
|
||||
*/
|
||||
public static Path[] stat2Paths(FileStatus[] stats) {
|
||||
if (stats == null)
|
||||
return null;
|
||||
Path[] ret = new Path[stats.length];
|
||||
for (int i = 0; i < stats.length; ++i) {
|
||||
ret[i] = stats[i].getPath();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* convert an array of FileStatus to an array of Path.
|
||||
* If stats if null, return path
|
||||
* @param stats
|
||||
* an array of FileStatus objects
|
||||
* @param path
|
||||
* default path to return in stats is null
|
||||
* @return an array of paths corresponding to the input
|
||||
*/
|
||||
public static Path[] stat2Paths(FileStatus[] stats, Path path) {
|
||||
if (stats == null)
|
||||
return new Path[]{path};
|
||||
else
|
||||
return stat2Paths(stats);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a directory and all its contents. If
|
||||
* we return false, the directory may be partially-deleted.
|
||||
*/
|
||||
public static boolean fullyDelete(File dir) throws IOException {
|
||||
File contents[] = dir.listFiles();
|
||||
if (contents != null) {
|
||||
for (int i = 0; i < contents.length; i++) {
|
||||
if (contents[i].isFile()) {
|
||||
if (!contents[i].delete()) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
//try deleting the directory
|
||||
// this might be a symlink
|
||||
boolean b = false;
|
||||
b = contents[i].delete();
|
||||
if (b){
|
||||
//this was indeed a symlink or an empty directory
|
||||
continue;
|
||||
}
|
||||
// if not an empty directory or symlink let
|
||||
// fullydelete handle it.
|
||||
if (!fullyDelete(contents[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return dir.delete();
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively delete a directory.
|
||||
*
|
||||
* @param fs {@link FileSystem} on which the path is present
|
||||
* @param dir directory to recursively delete
|
||||
* @throws IOException
|
||||
* @deprecated Use {@link FileSystem#delete(Path, boolean)}
|
||||
*/
|
||||
@Deprecated
|
||||
public static void fullyDelete(FileSystem fs, Path dir)
|
||||
throws IOException {
|
||||
fs.delete(dir, true);
|
||||
}
|
||||
|
||||
//
|
||||
// If the destination is a subdirectory of the source, then
|
||||
// generate exception
|
||||
//
|
||||
private static void checkDependencies(FileSystem srcFS,
|
||||
Path src,
|
||||
FileSystem dstFS,
|
||||
Path dst)
|
||||
throws IOException {
|
||||
if (srcFS == dstFS) {
|
||||
String srcq = src.makeQualified(srcFS).toString() + Path.SEPARATOR;
|
||||
String dstq = dst.makeQualified(dstFS).toString() + Path.SEPARATOR;
|
||||
if (dstq.startsWith(srcq)) {
|
||||
if (srcq.length() == dstq.length()) {
|
||||
throw new IOException("Cannot copy " + src + " to itself.");
|
||||
} else {
|
||||
throw new IOException("Cannot copy " + src + " to its subdirectory " +
|
||||
dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Copy files between FileSystems. */
|
||||
public static boolean copy(FileSystem srcFS, Path src,
|
||||
FileSystem dstFS, Path dst,
|
||||
boolean deleteSource,
|
||||
Configuration conf) throws IOException {
|
||||
return copy(srcFS, src, dstFS, dst, deleteSource, true, conf);
|
||||
}
|
||||
|
||||
public static boolean copy(FileSystem srcFS, Path[] srcs,
|
||||
FileSystem dstFS, Path dst,
|
||||
boolean deleteSource,
|
||||
boolean overwrite, Configuration conf)
|
||||
throws IOException {
|
||||
boolean gotException = false;
|
||||
boolean returnVal = true;
|
||||
StringBuffer exceptions = new StringBuffer();
|
||||
|
||||
if (srcs.length == 1)
|
||||
return copy(srcFS, srcs[0], dstFS, dst, deleteSource, overwrite, conf);
|
||||
|
||||
// Check if dest is directory
|
||||
if (!dstFS.exists(dst)) {
|
||||
throw new IOException("`" + dst +"': specified destination directory " +
|
||||
"doest not exist");
|
||||
} else {
|
||||
FileStatus sdst = dstFS.getFileStatus(dst);
|
||||
if (!sdst.isDir())
|
||||
throw new IOException("copying multiple files, but last argument `" +
|
||||
dst + "' is not a directory");
|
||||
}
|
||||
|
||||
for (Path src : srcs) {
|
||||
try {
|
||||
if (!copy(srcFS, src, dstFS, dst, deleteSource, overwrite, conf))
|
||||
returnVal = false;
|
||||
} catch (IOException e) {
|
||||
gotException = true;
|
||||
exceptions.append(e.getMessage());
|
||||
exceptions.append("\n");
|
||||
}
|
||||
}
|
||||
if (gotException) {
|
||||
throw new IOException(exceptions.toString());
|
||||
}
|
||||
return returnVal;
|
||||
}
|
||||
|
||||
/** Copy files between FileSystems. */
|
||||
public static boolean copy(FileSystem srcFS, Path src,
|
||||
FileSystem dstFS, Path dst,
|
||||
boolean deleteSource,
|
||||
boolean overwrite,
|
||||
Configuration conf) throws IOException {
|
||||
FileStatus fileStatus = srcFS.getFileStatus(src);
|
||||
return copy(srcFS, fileStatus, dstFS, dst, deleteSource, overwrite, conf);
|
||||
}
|
||||
|
||||
/** Copy files between FileSystems. */
|
||||
private static boolean copy(FileSystem srcFS, FileStatus srcStatus,
|
||||
FileSystem dstFS, Path dst,
|
||||
boolean deleteSource,
|
||||
boolean overwrite,
|
||||
Configuration conf) throws IOException {
|
||||
Path src = srcStatus.getPath();
|
||||
dst = checkDest(src.getName(), dstFS, dst, overwrite);
|
||||
if (srcStatus.isDir()) {
|
||||
checkDependencies(srcFS, src, dstFS, dst);
|
||||
if (!dstFS.mkdirs(dst)) {
|
||||
return false;
|
||||
}
|
||||
FileStatus contents[] = srcFS.listStatus(src);
|
||||
for (int i = 0; i < contents.length; i++) {
|
||||
copy(srcFS, contents[i], dstFS,
|
||||
new Path(dst, contents[i].getPath().getName()),
|
||||
deleteSource, overwrite, conf);
|
||||
}
|
||||
} else {
|
||||
InputStream in=null;
|
||||
OutputStream out = null;
|
||||
try {
|
||||
in = srcFS.open(src);
|
||||
out = dstFS.create(dst, overwrite);
|
||||
IOUtils.copyBytes(in, out, conf, true);
|
||||
} catch (IOException e) {
|
||||
IOUtils.closeStream(out);
|
||||
IOUtils.closeStream(in);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
if (deleteSource) {
|
||||
return srcFS.delete(src, true);
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Copy all files in a directory to one output file (merge). */
|
||||
public static boolean copyMerge(FileSystem srcFS, Path srcDir,
|
||||
FileSystem dstFS, Path dstFile,
|
||||
boolean deleteSource,
|
||||
Configuration conf, String addString) throws IOException {
|
||||
dstFile = checkDest(srcDir.getName(), dstFS, dstFile, false);
|
||||
|
||||
if (!srcFS.getFileStatus(srcDir).isDir())
|
||||
return false;
|
||||
|
||||
OutputStream out = dstFS.create(dstFile);
|
||||
|
||||
try {
|
||||
FileStatus contents[] = srcFS.listStatus(srcDir);
|
||||
for (int i = 0; i < contents.length; i++) {
|
||||
if (!contents[i].isDir()) {
|
||||
InputStream in = srcFS.open(contents[i].getPath());
|
||||
try {
|
||||
IOUtils.copyBytes(in, out, conf, false);
|
||||
if (addString!=null)
|
||||
out.write(addString.getBytes("UTF-8"));
|
||||
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
|
||||
|
||||
if (deleteSource) {
|
||||
return srcFS.delete(srcDir, true);
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/** Copy local files to a FileSystem. */
|
||||
public static boolean copy(File src,
|
||||
FileSystem dstFS, Path dst,
|
||||
boolean deleteSource,
|
||||
Configuration conf) throws IOException {
|
||||
dst = checkDest(src.getName(), dstFS, dst, false);
|
||||
|
||||
if (src.isDirectory()) {
|
||||
if (!dstFS.mkdirs(dst)) {
|
||||
return false;
|
||||
}
|
||||
File contents[] = src.listFiles();
|
||||
for (int i = 0; i < contents.length; i++) {
|
||||
copy(contents[i], dstFS, new Path(dst, contents[i].getName()),
|
||||
deleteSource, conf);
|
||||
}
|
||||
} else if (src.isFile()) {
|
||||
InputStream in = null;
|
||||
OutputStream out =null;
|
||||
try {
|
||||
in = new FileInputStream(src);
|
||||
out = dstFS.create(dst);
|
||||
IOUtils.copyBytes(in, out, conf);
|
||||
} catch (IOException e) {
|
||||
IOUtils.closeStream( out );
|
||||
IOUtils.closeStream( in );
|
||||
throw e;
|
||||
}
|
||||
} else {
|
||||
throw new IOException(src.toString() +
|
||||
": No such file or directory");
|
||||
}
|
||||
if (deleteSource) {
|
||||
return FileUtil.fullyDelete(src);
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/** Copy FileSystem files to local files. */
|
||||
public static boolean copy(FileSystem srcFS, Path src,
|
||||
File dst, boolean deleteSource,
|
||||
Configuration conf) throws IOException {
|
||||
FileStatus filestatus = srcFS.getFileStatus(src);
|
||||
return copy(srcFS, filestatus, dst, deleteSource, conf);
|
||||
}
|
||||
|
||||
/** Copy FileSystem files to local files. */
|
||||
private static boolean copy(FileSystem srcFS, FileStatus srcStatus,
|
||||
File dst, boolean deleteSource,
|
||||
Configuration conf) throws IOException {
|
||||
Path src = srcStatus.getPath();
|
||||
if (srcStatus.isDir()) {
|
||||
if (!dst.mkdirs()) {
|
||||
return false;
|
||||
}
|
||||
FileStatus contents[] = srcFS.listStatus(src);
|
||||
for (int i = 0; i < contents.length; i++) {
|
||||
copy(srcFS, contents[i],
|
||||
new File(dst, contents[i].getPath().getName()),
|
||||
deleteSource, conf);
|
||||
}
|
||||
} else {
|
||||
InputStream in = srcFS.open(src);
|
||||
IOUtils.copyBytes(in, new FileOutputStream(dst), conf);
|
||||
}
|
||||
if (deleteSource) {
|
||||
return srcFS.delete(src, true);
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private static Path checkDest(String srcName, FileSystem dstFS, Path dst,
|
||||
boolean overwrite) throws IOException {
|
||||
if (dstFS.exists(dst)) {
|
||||
FileStatus sdst = dstFS.getFileStatus(dst);
|
||||
if (sdst.isDir()) {
|
||||
if (null == srcName) {
|
||||
throw new IOException("Target " + dst + " is a directory");
|
||||
}
|
||||
return checkDest(null, dstFS, new Path(dst, srcName), overwrite);
|
||||
} else if (!overwrite) {
|
||||
throw new IOException("Target " + dst + " already exists");
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
/**
|
||||
* This class is only used on windows to invoke the cygpath command.
|
||||
*/
|
||||
private static class CygPathCommand extends Shell {
|
||||
String[] command;
|
||||
String result;
|
||||
CygPathCommand(String path) throws IOException {
|
||||
command = new String[]{"cygpath", "-u", path};
|
||||
run();
|
||||
}
|
||||
String getResult() throws IOException {
|
||||
return result;
|
||||
}
|
||||
protected String[] getExecString() {
|
||||
return command;
|
||||
}
|
||||
protected void parseExecResult(BufferedReader lines) throws IOException {
|
||||
String line = lines.readLine();
|
||||
if (line == null) {
|
||||
throw new IOException("Can't convert '" + command[2] +
|
||||
" to a cygwin path");
|
||||
}
|
||||
result = line;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a os-native filename to a path that works for the shell.
|
||||
* @param filename The filename to convert
|
||||
* @return The unix pathname
|
||||
* @throws IOException on windows, there can be problems with the subprocess
|
||||
*/
|
||||
public static String makeShellPath(String filename) throws IOException {
|
||||
if (Path.WINDOWS) {
|
||||
return new CygPathCommand(filename).getResult();
|
||||
} else {
|
||||
return filename;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a os-native filename to a path that works for the shell.
|
||||
* @param file The filename to convert
|
||||
* @return The unix pathname
|
||||
* @throws IOException on windows, there can be problems with the subprocess
|
||||
*/
|
||||
public static String makeShellPath(File file) throws IOException {
|
||||
return makeShellPath(file, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a os-native filename to a path that works for the shell.
|
||||
* @param file The filename to convert
|
||||
* @param makeCanonicalPath
|
||||
* Whether to make canonical path for the file passed
|
||||
* @return The unix pathname
|
||||
* @throws IOException on windows, there can be problems with the subprocess
|
||||
*/
|
||||
public static String makeShellPath(File file, boolean makeCanonicalPath)
|
||||
throws IOException {
|
||||
if (makeCanonicalPath) {
|
||||
return makeShellPath(file.getCanonicalPath());
|
||||
} else {
|
||||
return makeShellPath(file.toString());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes an input dir and returns the du on that local directory. Very basic
|
||||
* implementation.
|
||||
*
|
||||
* @param dir
|
||||
* The input dir to get the disk space of this local dir
|
||||
* @return The total disk space of the input local directory
|
||||
*/
|
||||
public static long getDU(File dir) {
|
||||
long size = 0;
|
||||
if (!dir.exists())
|
||||
return 0;
|
||||
if (!dir.isDirectory()) {
|
||||
return dir.length();
|
||||
} else {
|
||||
size = dir.length();
|
||||
File[] allFiles = dir.listFiles();
|
||||
for (int i = 0; i < allFiles.length; i++) {
|
||||
size = size + getDU(allFiles[i]);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a File input it will unzip the file in a the unzip directory
|
||||
* passed as the second parameter
|
||||
* @param inFile The zip file as input
|
||||
* @param unzipDir The unzip directory where to unzip the zip file.
|
||||
* @throws IOException
|
||||
*/
|
||||
public static void unZip(File inFile, File unzipDir) throws IOException {
|
||||
Enumeration<? extends ZipEntry> entries;
|
||||
ZipFile zipFile = new ZipFile(inFile);
|
||||
|
||||
try {
|
||||
entries = zipFile.entries();
|
||||
while (entries.hasMoreElements()) {
|
||||
ZipEntry entry = entries.nextElement();
|
||||
if (!entry.isDirectory()) {
|
||||
InputStream in = zipFile.getInputStream(entry);
|
||||
try {
|
||||
File file = new File(unzipDir, entry.getName());
|
||||
if (!file.getParentFile().mkdirs()) {
|
||||
if (!file.getParentFile().isDirectory()) {
|
||||
throw new IOException("Mkdirs failed to create " +
|
||||
file.getParentFile().toString());
|
||||
}
|
||||
}
|
||||
OutputStream out = new FileOutputStream(file);
|
||||
try {
|
||||
byte[] buffer = new byte[8192];
|
||||
int i;
|
||||
while ((i = in.read(buffer)) != -1) {
|
||||
out.write(buffer, 0, i);
|
||||
}
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
zipFile.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a Tar File as input it will untar the file in a the untar directory
|
||||
* passed as the second parameter
|
||||
*
|
||||
* This utility will untar ".tar" files and ".tar.gz","tgz" files.
|
||||
*
|
||||
* @param inFile The tar file as input.
|
||||
* @param untarDir The untar directory where to untar the tar file.
|
||||
* @throws IOException
|
||||
*/
|
||||
public static void unTar(File inFile, File untarDir) throws IOException {
|
||||
if (!untarDir.mkdirs()) {
|
||||
if (!untarDir.isDirectory()) {
|
||||
throw new IOException("Mkdirs failed to create " + untarDir);
|
||||
}
|
||||
}
|
||||
|
||||
StringBuffer untarCommand = new StringBuffer();
|
||||
boolean gzipped = inFile.toString().endsWith("gz");
|
||||
if (gzipped) {
|
||||
untarCommand.append(" gzip -dc '");
|
||||
untarCommand.append(FileUtil.makeShellPath(inFile));
|
||||
untarCommand.append("' | (");
|
||||
}
|
||||
untarCommand.append("cd '");
|
||||
untarCommand.append(FileUtil.makeShellPath(untarDir));
|
||||
untarCommand.append("' ; ");
|
||||
untarCommand.append("tar -xf ");
|
||||
|
||||
if (gzipped) {
|
||||
untarCommand.append(" -)");
|
||||
} else {
|
||||
untarCommand.append(FileUtil.makeShellPath(inFile));
|
||||
}
|
||||
String[] shellCmd = { "bash", "-c", untarCommand.toString() };
|
||||
ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd);
|
||||
shexec.execute();
|
||||
int exitcode = shexec.getExitCode();
|
||||
if (exitcode != 0) {
|
||||
throw new IOException("Error untarring file " + inFile +
|
||||
". Tar process exited with exit code " + exitcode);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Class for creating hardlinks.
|
||||
* Supports Unix, Cygwin, WindXP.
|
||||
*
|
||||
*/
|
||||
public static class HardLink {
|
||||
enum OSType {
|
||||
OS_TYPE_UNIX,
|
||||
OS_TYPE_WINXP,
|
||||
OS_TYPE_SOLARIS,
|
||||
OS_TYPE_MAC;
|
||||
}
|
||||
|
||||
private static String[] hardLinkCommand;
|
||||
private static String[] getLinkCountCommand;
|
||||
private static OSType osType;
|
||||
|
||||
static {
|
||||
osType = getOSType();
|
||||
switch(osType) {
|
||||
case OS_TYPE_WINXP:
|
||||
hardLinkCommand = new String[] {"fsutil","hardlink","create", null, null};
|
||||
getLinkCountCommand = new String[] {"stat","-c%h"};
|
||||
break;
|
||||
case OS_TYPE_SOLARIS:
|
||||
hardLinkCommand = new String[] {"ln", null, null};
|
||||
getLinkCountCommand = new String[] {"ls","-l"};
|
||||
break;
|
||||
case OS_TYPE_MAC:
|
||||
hardLinkCommand = new String[] {"ln", null, null};
|
||||
getLinkCountCommand = new String[] {"stat","-f%l"};
|
||||
break;
|
||||
case OS_TYPE_UNIX:
|
||||
default:
|
||||
hardLinkCommand = new String[] {"ln", null, null};
|
||||
getLinkCountCommand = new String[] {"stat","-c%h"};
|
||||
}
|
||||
}
|
||||
|
||||
static private OSType getOSType() {
|
||||
String osName = System.getProperty("os.name");
|
||||
if (osName.indexOf("Windows") >= 0 &&
|
||||
(osName.indexOf("XP") >= 0 || osName.indexOf("2003") >= 0 || osName.indexOf("Vista") >= 0))
|
||||
return OSType.OS_TYPE_WINXP;
|
||||
else if (osName.indexOf("SunOS") >= 0)
|
||||
return OSType.OS_TYPE_SOLARIS;
|
||||
else if (osName.indexOf("Mac") >= 0)
|
||||
return OSType.OS_TYPE_MAC;
|
||||
else
|
||||
return OSType.OS_TYPE_UNIX;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a hardlink
|
||||
*/
|
||||
public static void createHardLink(File target,
|
||||
File linkName) throws IOException {
|
||||
int len = hardLinkCommand.length;
|
||||
if (osType == OSType.OS_TYPE_WINXP) {
|
||||
hardLinkCommand[len-1] = target.getCanonicalPath();
|
||||
hardLinkCommand[len-2] = linkName.getCanonicalPath();
|
||||
} else {
|
||||
hardLinkCommand[len-2] = makeShellPath(target, true);
|
||||
hardLinkCommand[len-1] = makeShellPath(linkName, true);
|
||||
}
|
||||
// execute shell command
|
||||
Process process = Runtime.getRuntime().exec(hardLinkCommand);
|
||||
try {
|
||||
if (process.waitFor() != 0) {
|
||||
String errMsg = new BufferedReader(new InputStreamReader(
|
||||
process.getInputStream())).readLine();
|
||||
if (errMsg == null) errMsg = "";
|
||||
String inpMsg = new BufferedReader(new InputStreamReader(
|
||||
process.getErrorStream())).readLine();
|
||||
if (inpMsg == null) inpMsg = "";
|
||||
throw new IOException(errMsg + inpMsg);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
throw new IOException(StringUtils.stringifyException(e));
|
||||
} finally {
|
||||
process.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the number of links to the specified file.
|
||||
*/
|
||||
public static int getLinkCount(File fileName) throws IOException {
|
||||
int len = getLinkCountCommand.length;
|
||||
String[] cmd = new String[len + 1];
|
||||
for (int i = 0; i < len; i++) {
|
||||
cmd[i] = getLinkCountCommand[i];
|
||||
}
|
||||
cmd[len] = fileName.toString();
|
||||
String inpMsg = "";
|
||||
String errMsg = "";
|
||||
int exitValue = -1;
|
||||
BufferedReader in = null;
|
||||
BufferedReader err = null;
|
||||
|
||||
// execute shell command
|
||||
Process process = Runtime.getRuntime().exec(cmd);
|
||||
try {
|
||||
exitValue = process.waitFor();
|
||||
in = new BufferedReader(new InputStreamReader(
|
||||
process.getInputStream()));
|
||||
inpMsg = in.readLine();
|
||||
if (inpMsg == null) inpMsg = "";
|
||||
|
||||
err = new BufferedReader(new InputStreamReader(
|
||||
process.getErrorStream()));
|
||||
errMsg = err.readLine();
|
||||
if (errMsg == null) errMsg = "";
|
||||
if (exitValue != 0) {
|
||||
throw new IOException(inpMsg + errMsg);
|
||||
}
|
||||
if (getOSType() == OSType.OS_TYPE_SOLARIS) {
|
||||
String[] result = inpMsg.split("\\s+");
|
||||
return Integer.parseInt(result[1]);
|
||||
} else {
|
||||
return Integer.parseInt(inpMsg);
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
throw new IOException(StringUtils.stringifyException(e) +
|
||||
inpMsg + errMsg +
|
||||
" on file:" + fileName);
|
||||
} catch (InterruptedException e) {
|
||||
throw new IOException(StringUtils.stringifyException(e) +
|
||||
inpMsg + errMsg +
|
||||
" on file:" + fileName);
|
||||
} finally {
|
||||
process.destroy();
|
||||
if (in != null) in.close();
|
||||
if (err != null) err.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a soft link between a src and destination
|
||||
* only on a local disk. HDFS does not support this
|
||||
* @param target the target for symlink
|
||||
* @param linkname the symlink
|
||||
* @return value returned by the command
|
||||
*/
|
||||
public static int symLink(String target, String linkname) throws IOException{
|
||||
String cmd = "ln -s " + target + " " + linkname;
|
||||
Process p = Runtime.getRuntime().exec(cmd, null);
|
||||
int returnVal = -1;
|
||||
try{
|
||||
returnVal = p.waitFor();
|
||||
} catch(InterruptedException e){
|
||||
//do nothing as of yet
|
||||
}
|
||||
return returnVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the permissions on a filename.
|
||||
* @param filename the name of the file to change
|
||||
* @param perm the permission string
|
||||
* @return the exit code from the command
|
||||
* @throws IOException
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
public static int chmod(String filename, String perm
|
||||
) throws IOException, InterruptedException {
|
||||
return chmod(filename, perm, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the permissions on a file / directory, recursively, if
|
||||
* needed.
|
||||
* @param filename name of the file whose permissions are to change
|
||||
* @param perm permission string
|
||||
* @param recursive true, if permissions should be changed recursively
|
||||
* @return the exit code from the command.
|
||||
* @throws IOException
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
public static int chmod(String filename, String perm, boolean recursive)
|
||||
throws IOException, InterruptedException {
|
||||
StringBuffer cmdBuf = new StringBuffer();
|
||||
cmdBuf.append("chmod ");
|
||||
if (recursive) {
|
||||
cmdBuf.append("-R ");
|
||||
}
|
||||
cmdBuf.append(perm).append(" ");
|
||||
cmdBuf.append(filename);
|
||||
String[] shellCmd = {"bash", "-c" ,cmdBuf.toString()};
|
||||
ShellCommandExecutor shExec = new ShellCommandExecutor(shellCmd);
|
||||
try {
|
||||
shExec.execute();
|
||||
}catch(Exception e) {
|
||||
if(Log.isDebugEnabled()) {
|
||||
Log.debug("Error while changing permission : " + filename
|
||||
+" Exception: " + StringUtils.stringifyException(e));
|
||||
}
|
||||
}
|
||||
return shExec.getExitCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a tmp file for a base file.
|
||||
* @param basefile the base file of the tmp
|
||||
* @param prefix file name prefix of tmp
|
||||
* @param isDeleteOnExit if true, the tmp will be deleted when the VM exits
|
||||
* @return a newly created tmp file
|
||||
* @exception IOException If a tmp file cannot created
|
||||
* @see java.io.File#createTempFile(String, String, File)
|
||||
* @see java.io.File#deleteOnExit()
|
||||
*/
|
||||
public static final File createLocalTempFile(final File basefile,
|
||||
final String prefix,
|
||||
final boolean isDeleteOnExit)
|
||||
throws IOException {
|
||||
File tmp = File.createTempFile(prefix + basefile.getName(),
|
||||
"", basefile.getParentFile());
|
||||
if (isDeleteOnExit) {
|
||||
tmp.deleteOnExit();
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move the src file to the name specified by target.
|
||||
* @param src the source file
|
||||
* @param target the target file
|
||||
* @exception IOException If this operation fails
|
||||
*/
|
||||
public static void replaceFile(File src, File target) throws IOException {
|
||||
/* renameTo() has two limitations on Windows platform.
|
||||
* src.renameTo(target) fails if
|
||||
* 1) If target already exists OR
|
||||
* 2) If target is already open for reading/writing.
|
||||
*/
|
||||
if (!src.renameTo(target)) {
|
||||
int retries = 5;
|
||||
while (target.exists() && !target.delete() && retries-- >= 0) {
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException e) {
|
||||
throw new IOException("replaceFile interrupted.");
|
||||
}
|
||||
}
|
||||
if (!src.renameTo(target)) {
|
||||
throw new IOException("Unable to rename " + src +
|
||||
" to " + target);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,278 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.URI;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
/****************************************************************
|
||||
* A <code>FilterFileSystem</code> contains
|
||||
* some other file system, which it uses as
|
||||
* its basic file system, possibly transforming
|
||||
* the data along the way or providing additional
|
||||
* functionality. The class <code>FilterFileSystem</code>
|
||||
* itself simply overrides all methods of
|
||||
* <code>FileSystem</code> with versions that
|
||||
* pass all requests to the contained file
|
||||
* system. Subclasses of <code>FilterFileSystem</code>
|
||||
* may further override some of these methods
|
||||
* and may also provide additional methods
|
||||
* and fields.
|
||||
*
|
||||
*****************************************************************/
|
||||
public class FilterFileSystem extends FileSystem {
|
||||
|
||||
protected FileSystem fs;
|
||||
|
||||
/*
|
||||
* so that extending classes can define it
|
||||
*/
|
||||
public FilterFileSystem() {
|
||||
}
|
||||
|
||||
public FilterFileSystem(FileSystem fs) {
|
||||
this.fs = fs;
|
||||
this.statistics = fs.statistics;
|
||||
}
|
||||
|
||||
/** Called after a new FileSystem instance is constructed.
|
||||
* @param name a uri whose authority section names the host, port, etc.
|
||||
* for this FileSystem
|
||||
* @param conf the configuration
|
||||
*/
|
||||
public void initialize(URI name, Configuration conf) throws IOException {
|
||||
fs.initialize(name, conf);
|
||||
}
|
||||
|
||||
/** Returns a URI whose scheme and authority identify this FileSystem.*/
|
||||
public URI getUri() {
|
||||
return fs.getUri();
|
||||
}
|
||||
|
||||
/** Make sure that a path specifies a FileSystem. */
|
||||
public Path makeQualified(Path path) {
|
||||
return fs.makeQualified(path);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// FileSystem
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
/** Check that a Path belongs to this FileSystem. */
|
||||
protected void checkPath(Path path) {
|
||||
fs.checkPath(path);
|
||||
}
|
||||
|
||||
public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
|
||||
long len) throws IOException {
|
||||
return fs.getFileBlockLocations(file, start, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens an FSDataInputStream at the indicated Path.
|
||||
* @param f the file name to open
|
||||
* @param bufferSize the size of the buffer to be used.
|
||||
*/
|
||||
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
|
||||
return fs.open(f, bufferSize);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public FSDataOutputStream append(Path f, int bufferSize,
|
||||
Progressable progress) throws IOException {
|
||||
return fs.append(f, bufferSize, progress);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public FSDataOutputStream create(Path f, FsPermission permission,
|
||||
boolean overwrite, int bufferSize, short replication, long blockSize,
|
||||
Progressable progress) throws IOException {
|
||||
return fs.create(f, permission,
|
||||
overwrite, bufferSize, replication, blockSize, progress);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set replication for an existing file.
|
||||
*
|
||||
* @param src file name
|
||||
* @param replication new replication
|
||||
* @throws IOException
|
||||
* @return true if successful;
|
||||
* false if file does not exist or is a directory
|
||||
*/
|
||||
public boolean setReplication(Path src, short replication) throws IOException {
|
||||
return fs.setReplication(src, replication);
|
||||
}
|
||||
|
||||
/**
|
||||
* Renames Path src to Path dst. Can take place on local fs
|
||||
* or remote DFS.
|
||||
*/
|
||||
public boolean rename(Path src, Path dst) throws IOException {
|
||||
return fs.rename(src, dst);
|
||||
}
|
||||
|
||||
/** Delete a file */
|
||||
public boolean delete(Path f, boolean recursive) throws IOException {
|
||||
return fs.delete(f, recursive);
|
||||
}
|
||||
|
||||
/** List files in a directory. */
|
||||
public FileStatus[] listStatus(Path f) throws IOException {
|
||||
return fs.listStatus(f);
|
||||
}
|
||||
|
||||
public Path getHomeDirectory() {
|
||||
return fs.getHomeDirectory();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set the current working directory for the given file system. All relative
|
||||
* paths will be resolved relative to it.
|
||||
*
|
||||
* @param newDir
|
||||
*/
|
||||
public void setWorkingDirectory(Path newDir) {
|
||||
fs.setWorkingDirectory(newDir);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current working directory for the given file system
|
||||
*
|
||||
* @return the directory pathname
|
||||
*/
|
||||
public Path getWorkingDirectory() {
|
||||
return fs.getWorkingDirectory();
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public FsStatus getStatus(Path p) throws IOException {
|
||||
return fs.getStatus(p);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public boolean mkdirs(Path f, FsPermission permission) throws IOException {
|
||||
return fs.mkdirs(f, permission);
|
||||
}
|
||||
|
||||
/**
|
||||
* The src file is on the local disk. Add it to FS at
|
||||
* the given dst name.
|
||||
* delSrc indicates if the source should be removed
|
||||
*/
|
||||
public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
|
||||
throws IOException {
|
||||
fs.copyFromLocalFile(delSrc, src, dst);
|
||||
}
|
||||
|
||||
/**
|
||||
* The src file is under FS, and the dst is on the local disk.
|
||||
* Copy it from FS control to the local dst name.
|
||||
* delSrc indicates if the src will be removed or not.
|
||||
*/
|
||||
public void copyToLocalFile(boolean delSrc, Path src, Path dst)
|
||||
throws IOException {
|
||||
fs.copyToLocalFile(delSrc, src, dst);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a local File that the user can write output to. The caller
|
||||
* provides both the eventual FS target name and the local working
|
||||
* file. If the FS is local, we write directly into the target. If
|
||||
* the FS is remote, we write into the tmp local area.
|
||||
*/
|
||||
public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
|
||||
throws IOException {
|
||||
return fs.startLocalOutput(fsOutputFile, tmpLocalFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when we're all done writing to the target. A local FS will
|
||||
* do nothing, because we've written to exactly the right place. A remote
|
||||
* FS will copy the contents of tmpLocalFile to the correct target at
|
||||
* fsOutputFile.
|
||||
*/
|
||||
public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
|
||||
throws IOException {
|
||||
fs.completeLocalOutput(fsOutputFile, tmpLocalFile);
|
||||
}
|
||||
|
||||
/** Return the number of bytes that large input files should be optimally
|
||||
* be split into to minimize i/o time. */
|
||||
public long getDefaultBlockSize() {
|
||||
return fs.getDefaultBlockSize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the default replication.
|
||||
*/
|
||||
public short getDefaultReplication() {
|
||||
return fs.getDefaultReplication();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get file status.
|
||||
*/
|
||||
public FileStatus getFileStatus(Path f) throws IOException {
|
||||
return fs.getFileStatus(f);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public FileChecksum getFileChecksum(Path f) throws IOException {
|
||||
return fs.getFileChecksum(f);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void setVerifyChecksum(boolean verifyChecksum) {
|
||||
fs.setVerifyChecksum(verifyChecksum);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Configuration getConf() {
|
||||
return fs.getConf();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
fs.close();
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public void setOwner(Path p, String username, String groupname
|
||||
) throws IOException {
|
||||
fs.setOwner(p, username, groupname);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public void setPermission(Path p, FsPermission permission
|
||||
) throws IOException {
|
||||
fs.setPermission(p, permission);
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,315 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.hadoop.fs.FsShell.CmdHandler;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
|
||||
|
||||
/**
|
||||
* This class is the home for file permissions related commands.
|
||||
* Moved to this separate class since FsShell is getting too large.
|
||||
*/
|
||||
class FsShellPermissions {
|
||||
|
||||
/*========== chmod ==========*/
|
||||
|
||||
/*
|
||||
* The pattern is almost as flexible as mode allowed by chmod shell command.
|
||||
* The main restriction is that we recognize only rwxXt. To reduce errors we
|
||||
* also enforce octal mode specifications of either 3 digits without a sticky
|
||||
* bit setting or four digits with a sticky bit setting.
|
||||
*/
|
||||
private static Pattern chmodNormalPattern =
|
||||
Pattern.compile("\\G\\s*([ugoa]*)([+=-]+)([rwxXt]+)([,\\s]*)\\s*");
|
||||
private static Pattern chmodOctalPattern =
|
||||
Pattern.compile("^\\s*[+]?([01]?)([0-7]{3})\\s*$");
|
||||
|
||||
static String CHMOD_USAGE =
|
||||
"-chmod [-R] <MODE[,MODE]... | OCTALMODE> PATH...";
|
||||
|
||||
private static class ChmodHandler extends CmdHandler {
|
||||
|
||||
private short userMode;
|
||||
private short groupMode;
|
||||
private short othersMode;
|
||||
private short stickyMode;
|
||||
private char userType = '+';
|
||||
private char groupType = '+';
|
||||
private char othersType = '+';
|
||||
private char stickyBitType = '+';
|
||||
|
||||
private void applyNormalPattern(String modeStr, Matcher matcher)
|
||||
throws IOException {
|
||||
// Are there multiple permissions stored in one chmod?
|
||||
boolean commaSeperated = false;
|
||||
|
||||
for(int i=0; i < 1 || matcher.end() < modeStr.length(); i++) {
|
||||
if (i>0 && (!commaSeperated || !matcher.find())) {
|
||||
patternError(modeStr);
|
||||
}
|
||||
|
||||
/* groups : 1 : [ugoa]*
|
||||
* 2 : [+-=]
|
||||
* 3 : [rwxXt]+
|
||||
* 4 : [,\s]*
|
||||
*/
|
||||
|
||||
String str = matcher.group(2);
|
||||
char type = str.charAt(str.length() - 1);
|
||||
|
||||
boolean user, group, others, stickyBit;
|
||||
user = group = others = stickyBit = false;
|
||||
|
||||
for(char c : matcher.group(1).toCharArray()) {
|
||||
switch (c) {
|
||||
case 'u' : user = true; break;
|
||||
case 'g' : group = true; break;
|
||||
case 'o' : others = true; break;
|
||||
case 'a' : break;
|
||||
default : throw new RuntimeException("Unexpected");
|
||||
}
|
||||
}
|
||||
|
||||
if (!(user || group || others)) { // same as specifying 'a'
|
||||
user = group = others = true;
|
||||
}
|
||||
|
||||
short mode = 0;
|
||||
|
||||
for(char c : matcher.group(3).toCharArray()) {
|
||||
switch (c) {
|
||||
case 'r' : mode |= 4; break;
|
||||
case 'w' : mode |= 2; break;
|
||||
case 'x' : mode |= 1; break;
|
||||
case 'X' : mode |= 8; break;
|
||||
case 't' : stickyBit = true; break;
|
||||
default : throw new RuntimeException("Unexpected");
|
||||
}
|
||||
}
|
||||
|
||||
if ( user ) {
|
||||
userMode = mode;
|
||||
userType = type;
|
||||
}
|
||||
|
||||
if ( group ) {
|
||||
groupMode = mode;
|
||||
groupType = type;
|
||||
}
|
||||
|
||||
if ( others ) {
|
||||
othersMode = mode;
|
||||
othersType = type;
|
||||
|
||||
stickyMode = (short) (stickyBit ? 1 : 0);
|
||||
stickyBitType = type;
|
||||
}
|
||||
|
||||
commaSeperated = matcher.group(4).contains(",");
|
||||
}
|
||||
}
|
||||
|
||||
private void applyOctalPattern(String modeStr, Matcher matcher) {
|
||||
userType = groupType = othersType = '=';
|
||||
|
||||
// Check if sticky bit is specified
|
||||
String sb = matcher.group(1);
|
||||
if(!sb.isEmpty()) {
|
||||
stickyMode = Short.valueOf(sb.substring(0, 1));
|
||||
stickyBitType = '=';
|
||||
}
|
||||
|
||||
String str = matcher.group(2);
|
||||
userMode = Short.valueOf(str.substring(0, 1));
|
||||
groupMode = Short.valueOf(str.substring(1, 2));
|
||||
othersMode = Short.valueOf(str.substring(2, 3));
|
||||
}
|
||||
|
||||
private void patternError(String mode) throws IOException {
|
||||
throw new IOException("chmod : mode '" + mode +
|
||||
"' does not match the expected pattern.");
|
||||
}
|
||||
|
||||
ChmodHandler(FileSystem fs, String modeStr) throws IOException {
|
||||
super("chmod", fs);
|
||||
Matcher matcher = null;
|
||||
|
||||
if ((matcher = chmodNormalPattern.matcher(modeStr)).find()) {
|
||||
applyNormalPattern(modeStr, matcher);
|
||||
} else if ((matcher = chmodOctalPattern.matcher(modeStr)).matches()) {
|
||||
applyOctalPattern(modeStr, matcher);
|
||||
} else {
|
||||
patternError(modeStr);
|
||||
}
|
||||
}
|
||||
|
||||
private int applyChmod(char type, int mode, int existing, boolean exeOk) {
|
||||
boolean capX = false;
|
||||
|
||||
if ((mode&8) != 0) { // convert X to x;
|
||||
capX = true;
|
||||
mode &= ~8;
|
||||
mode |= 1;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case '+' : mode = mode | existing; break;
|
||||
case '-' : mode = (~mode) & existing; break;
|
||||
case '=' : break;
|
||||
default : throw new RuntimeException("Unexpected");
|
||||
}
|
||||
|
||||
// if X is specified add 'x' only if exeOk or x was already set.
|
||||
if (capX && !exeOk && (mode&1) != 0 && (existing&1) == 0) {
|
||||
mode &= ~1; // remove x
|
||||
}
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run(FileStatus file, FileSystem srcFs) throws IOException {
|
||||
FsPermission perms = file.getPermission();
|
||||
int existing = perms.toShort();
|
||||
boolean exeOk = file.isDir() || (existing & 0111) != 0;
|
||||
int newperms = ( applyChmod(stickyBitType, stickyMode,
|
||||
(existing>>>9), false) << 9 |
|
||||
applyChmod(userType, userMode,
|
||||
(existing>>>6)&7, exeOk) << 6 |
|
||||
applyChmod(groupType, groupMode,
|
||||
(existing>>>3)&7, exeOk) << 3 |
|
||||
applyChmod(othersType, othersMode, existing&7, exeOk));
|
||||
|
||||
if (existing != newperms) {
|
||||
try {
|
||||
srcFs.setPermission(file.getPath(),
|
||||
new FsPermission((short)newperms));
|
||||
} catch (IOException e) {
|
||||
System.err.println(getName() + ": changing permissions of '" +
|
||||
file.getPath() + "':" + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*========== chown ==========*/
|
||||
|
||||
static private String allowedChars = "[-_./@a-zA-Z0-9]";
|
||||
///allows only "allowedChars" above in names for owner and group
|
||||
static private Pattern chownPattern =
|
||||
Pattern.compile("^\\s*(" + allowedChars + "+)?" +
|
||||
"([:](" + allowedChars + "*))?\\s*$");
|
||||
static private Pattern chgrpPattern =
|
||||
Pattern.compile("^\\s*(" + allowedChars + "+)\\s*$");
|
||||
|
||||
static String CHOWN_USAGE = "-chown [-R] [OWNER][:[GROUP]] PATH...";
|
||||
static String CHGRP_USAGE = "-chgrp [-R] GROUP PATH...";
|
||||
|
||||
private static class ChownHandler extends CmdHandler {
|
||||
protected String owner = null;
|
||||
protected String group = null;
|
||||
|
||||
protected ChownHandler(String cmd, FileSystem fs) { //for chgrp
|
||||
super(cmd, fs);
|
||||
}
|
||||
|
||||
ChownHandler(FileSystem fs, String ownerStr) throws IOException {
|
||||
super("chown", fs);
|
||||
Matcher matcher = chownPattern.matcher(ownerStr);
|
||||
if (!matcher.matches()) {
|
||||
throw new IOException("'" + ownerStr + "' does not match " +
|
||||
"expected pattern for [owner][:group].");
|
||||
}
|
||||
owner = matcher.group(1);
|
||||
group = matcher.group(3);
|
||||
if (group != null && group.length() == 0) {
|
||||
group = null;
|
||||
}
|
||||
if (owner == null && group == null) {
|
||||
throw new IOException("'" + ownerStr + "' does not specify " +
|
||||
" onwer or group.");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run(FileStatus file, FileSystem srcFs) throws IOException {
|
||||
//Should we do case insensitive match?
|
||||
String newOwner = (owner == null || owner.equals(file.getOwner())) ?
|
||||
null : owner;
|
||||
String newGroup = (group == null || group.equals(file.getGroup())) ?
|
||||
null : group;
|
||||
|
||||
if (newOwner != null || newGroup != null) {
|
||||
try {
|
||||
srcFs.setOwner(file.getPath(), newOwner, newGroup);
|
||||
} catch (IOException e) {
|
||||
System.err.println(getName() + ": changing ownership of '" +
|
||||
file.getPath() + "':" + e.getMessage());
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*========== chgrp ==========*/
|
||||
|
||||
private static class ChgrpHandler extends ChownHandler {
|
||||
ChgrpHandler(FileSystem fs, String groupStr) throws IOException {
|
||||
super("chgrp", fs);
|
||||
|
||||
Matcher matcher = chgrpPattern.matcher(groupStr);
|
||||
if (!matcher.matches()) {
|
||||
throw new IOException("'" + groupStr + "' does not match " +
|
||||
"expected pattern for group");
|
||||
}
|
||||
group = matcher.group(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void changePermissions(FileSystem fs, String cmd,
|
||||
String argv[], int startIndex, FsShell shell)
|
||||
throws IOException {
|
||||
CmdHandler handler = null;
|
||||
boolean recursive = false;
|
||||
|
||||
// handle common arguments, currently only "-R"
|
||||
for (; startIndex < argv.length && argv[startIndex].equals("-R");
|
||||
startIndex++) {
|
||||
recursive = true;
|
||||
}
|
||||
|
||||
if ( startIndex >= argv.length ) {
|
||||
throw new IOException("Not enough arguments for the command");
|
||||
}
|
||||
|
||||
if (cmd.equals("-chmod")) {
|
||||
handler = new ChmodHandler(fs, argv[startIndex++]);
|
||||
} else if (cmd.equals("-chown")) {
|
||||
handler = new ChownHandler(fs, argv[startIndex++]);
|
||||
} else if (cmd.equals("-chgrp")) {
|
||||
handler = new ChgrpHandler(fs, argv[startIndex++]);
|
||||
}
|
||||
|
||||
shell.runCmdHandler(handler, argv, startIndex, recursive);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
/** This class is used to represent the capacity, free and used space on a
|
||||
* {@link FileSystem}.
|
||||
*/
|
||||
public class FsStatus implements Writable {
|
||||
private long capacity;
|
||||
private long used;
|
||||
private long remaining;
|
||||
|
||||
/** Construct a FsStatus object, using the specified statistics */
|
||||
public FsStatus(long capacity, long used, long remaining) {
|
||||
this.capacity = capacity;
|
||||
this.used = used;
|
||||
this.remaining = remaining;
|
||||
}
|
||||
|
||||
/** Return the capacity in bytes of the file system */
|
||||
public long getCapacity() {
|
||||
return capacity;
|
||||
}
|
||||
|
||||
/** Return the number of bytes used on the file system */
|
||||
public long getUsed() {
|
||||
return used;
|
||||
}
|
||||
|
||||
/** Return the number of remaining bytes on the file system */
|
||||
public long getRemaining() {
|
||||
return remaining;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// Writable
|
||||
//////////////////////////////////////////////////
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeLong(capacity);
|
||||
out.writeLong(used);
|
||||
out.writeLong(remaining);
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
capacity = in.readLong();
|
||||
used = in.readLong();
|
||||
remaining = in.readLong();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
/**
|
||||
* Representation of a URL connection to open InputStreams.
|
||||
*/
|
||||
class FsUrlConnection extends URLConnection {
|
||||
|
||||
private Configuration conf;
|
||||
|
||||
private InputStream is;
|
||||
|
||||
FsUrlConnection(Configuration conf, URL url) {
|
||||
super(url);
|
||||
this.conf = conf;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void connect() throws IOException {
|
||||
try {
|
||||
FileSystem fs = FileSystem.get(url.toURI(), conf);
|
||||
is = fs.open(new Path(url.getPath()));
|
||||
} catch (URISyntaxException e) {
|
||||
throw new IOException(e.toString());
|
||||
}
|
||||
}
|
||||
|
||||
/* @inheritDoc */
|
||||
@Override
|
||||
public InputStream getInputStream() throws IOException {
|
||||
if (is == null) {
|
||||
connect();
|
||||
}
|
||||
return is;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.net.URLStreamHandler;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
/**
|
||||
* URLStream handler relying on FileSystem and on a given Configuration to
|
||||
* handle URL protocols.
|
||||
*/
|
||||
class FsUrlStreamHandler extends URLStreamHandler {
|
||||
|
||||
private Configuration conf;
|
||||
|
||||
FsUrlStreamHandler(Configuration conf) {
|
||||
this.conf = conf;
|
||||
}
|
||||
|
||||
FsUrlStreamHandler() {
|
||||
this.conf = new Configuration();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FsUrlConnection openConnection(URL url) throws IOException {
|
||||
return new FsUrlConnection(conf, url);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.net.URLStreamHandlerFactory;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
/**
|
||||
* Factory for URL stream handlers.
|
||||
*
|
||||
* There is only one handler whose job is to create UrlConnections. A
|
||||
* FsUrlConnection relies on FileSystem to choose the appropriate FS
|
||||
* implementation.
|
||||
*
|
||||
* Before returning our handler, we make sure that FileSystem knows an
|
||||
* implementation for the requested scheme/protocol.
|
||||
*/
|
||||
public class FsUrlStreamHandlerFactory implements
|
||||
URLStreamHandlerFactory {
|
||||
|
||||
// The configuration holds supported FS implementation class names.
|
||||
private Configuration conf;
|
||||
|
||||
// This map stores whether a protocol is know or not by FileSystem
|
||||
private Map<String, Boolean> protocols = new HashMap<String, Boolean>();
|
||||
|
||||
// The URL Stream handler
|
||||
private java.net.URLStreamHandler handler;
|
||||
|
||||
public FsUrlStreamHandlerFactory() {
|
||||
this.conf = new Configuration();
|
||||
// force the resolution of the configuration files
|
||||
// this is required if we want the factory to be able to handle
|
||||
// file:// URLs
|
||||
this.conf.getClass("fs.file.impl", null);
|
||||
this.handler = new FsUrlStreamHandler(this.conf);
|
||||
}
|
||||
|
||||
public FsUrlStreamHandlerFactory(Configuration conf) {
|
||||
this.conf = new Configuration(conf);
|
||||
// force the resolution of the configuration files
|
||||
this.conf.getClass("fs.file.impl", null);
|
||||
this.handler = new FsUrlStreamHandler(this.conf);
|
||||
}
|
||||
|
||||
public java.net.URLStreamHandler createURLStreamHandler(String protocol) {
|
||||
if (!protocols.containsKey(protocol)) {
|
||||
boolean known =
|
||||
(conf.getClass("fs." + protocol + ".impl", null) != null);
|
||||
protocols.put(protocol, known);
|
||||
}
|
||||
if (protocols.get(protocol)) {
|
||||
return handler;
|
||||
} else {
|
||||
// FileSystem does not know the protocol, let the VM handle this
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,166 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
class GlobExpander {
|
||||
|
||||
static class StringWithOffset {
|
||||
String string;
|
||||
int offset;
|
||||
public StringWithOffset(String string, int offset) {
|
||||
super();
|
||||
this.string = string;
|
||||
this.offset = offset;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand globs in the given <code>filePattern</code> into a collection of
|
||||
* file patterns so that in the expanded set no file pattern has a
|
||||
* slash character ("/") in a curly bracket pair.
|
||||
* @param filePattern
|
||||
* @return expanded file patterns
|
||||
* @throws IOException
|
||||
*/
|
||||
public static List<String> expand(String filePattern) throws IOException {
|
||||
List<String> fullyExpanded = new ArrayList<String>();
|
||||
List<StringWithOffset> toExpand = new ArrayList<StringWithOffset>();
|
||||
toExpand.add(new StringWithOffset(filePattern, 0));
|
||||
while (!toExpand.isEmpty()) {
|
||||
StringWithOffset path = toExpand.remove(0);
|
||||
List<StringWithOffset> expanded = expandLeftmost(path);
|
||||
if (expanded == null) {
|
||||
fullyExpanded.add(path.string);
|
||||
} else {
|
||||
toExpand.addAll(0, expanded);
|
||||
}
|
||||
}
|
||||
return fullyExpanded;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand the leftmost outer curly bracket pair containing a
|
||||
* slash character ("/") in <code>filePattern</code>.
|
||||
* @param filePattern
|
||||
* @return expanded file patterns
|
||||
* @throws IOException
|
||||
*/
|
||||
private static List<StringWithOffset> expandLeftmost(StringWithOffset
|
||||
filePatternWithOffset) throws IOException {
|
||||
|
||||
String filePattern = filePatternWithOffset.string;
|
||||
int leftmost = leftmostOuterCurlyContainingSlash(filePattern,
|
||||
filePatternWithOffset.offset);
|
||||
if (leftmost == -1) {
|
||||
return null;
|
||||
}
|
||||
int curlyOpen = 0;
|
||||
StringBuilder prefix = new StringBuilder(filePattern.substring(0, leftmost));
|
||||
StringBuilder suffix = new StringBuilder();
|
||||
List<String> alts = new ArrayList<String>();
|
||||
StringBuilder alt = new StringBuilder();
|
||||
StringBuilder cur = prefix;
|
||||
for (int i = leftmost; i < filePattern.length(); i++) {
|
||||
char c = filePattern.charAt(i);
|
||||
if (cur == suffix) {
|
||||
cur.append(c);
|
||||
} else if (c == '\\') {
|
||||
i++;
|
||||
if (i >= filePattern.length()) {
|
||||
throw new IOException("Illegal file pattern: "
|
||||
+ "An escaped character does not present for glob "
|
||||
+ filePattern + " at " + i);
|
||||
}
|
||||
c = filePattern.charAt(i);
|
||||
cur.append(c);
|
||||
} else if (c == '{') {
|
||||
if (curlyOpen++ == 0) {
|
||||
alt.setLength(0);
|
||||
cur = alt;
|
||||
} else {
|
||||
cur.append(c);
|
||||
}
|
||||
|
||||
} else if (c == '}' && curlyOpen > 0) {
|
||||
if (--curlyOpen == 0) {
|
||||
alts.add(alt.toString());
|
||||
alt.setLength(0);
|
||||
cur = suffix;
|
||||
} else {
|
||||
cur.append(c);
|
||||
}
|
||||
} else if (c == ',') {
|
||||
if (curlyOpen == 1) {
|
||||
alts.add(alt.toString());
|
||||
alt.setLength(0);
|
||||
} else {
|
||||
cur.append(c);
|
||||
}
|
||||
} else {
|
||||
cur.append(c);
|
||||
}
|
||||
}
|
||||
List<StringWithOffset> exp = new ArrayList<StringWithOffset>();
|
||||
for (String string : alts) {
|
||||
exp.add(new StringWithOffset(prefix + string + suffix, prefix.length()));
|
||||
}
|
||||
return exp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the index of the leftmost opening curly bracket containing a
|
||||
* slash character ("/") in <code>filePattern</code>.
|
||||
* @param filePattern
|
||||
* @return the index of the leftmost opening curly bracket containing a
|
||||
* slash character ("/"), or -1 if there is no such bracket
|
||||
* @throws IOException
|
||||
*/
|
||||
private static int leftmostOuterCurlyContainingSlash(String filePattern,
|
||||
int offset) throws IOException {
|
||||
int curlyOpen = 0;
|
||||
int leftmost = -1;
|
||||
boolean seenSlash = false;
|
||||
for (int i = offset; i < filePattern.length(); i++) {
|
||||
char c = filePattern.charAt(i);
|
||||
if (c == '\\') {
|
||||
i++;
|
||||
if (i >= filePattern.length()) {
|
||||
throw new IOException("Illegal file pattern: "
|
||||
+ "An escaped character does not present for glob "
|
||||
+ filePattern + " at " + i);
|
||||
}
|
||||
} else if (c == '{') {
|
||||
if (curlyOpen++ == 0) {
|
||||
leftmost = i;
|
||||
}
|
||||
} else if (c == '}' && curlyOpen > 0) {
|
||||
if (--curlyOpen == 0 && leftmost != -1 && seenSlash) {
|
||||
return leftmost;
|
||||
}
|
||||
} else if (c == '/' && curlyOpen > 0) {
|
||||
seenSlash = true;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,892 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.util.LineReader;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
/**
|
||||
* This is an implementation of the Hadoop Archive
|
||||
* Filesystem. This archive Filesystem has index files
|
||||
* of the form _index* and has contents of the form
|
||||
* part-*. The index files store the indexes of the
|
||||
* real files. The index files are of the form _masterindex
|
||||
* and _index. The master index is a level of indirection
|
||||
* in to the index file to make the look ups faster. the index
|
||||
* file is sorted with hash code of the paths that it contains
|
||||
* and the master index contains pointers to the positions in
|
||||
* index for ranges of hashcodes.
|
||||
*/
|
||||
|
||||
public class HarFileSystem extends FilterFileSystem {
|
||||
public static final int VERSION = 1;
|
||||
// uri representation of this Har filesystem
|
||||
private URI uri;
|
||||
// the version of this har filesystem
|
||||
private int version;
|
||||
// underlying uri
|
||||
private URI underLyingURI;
|
||||
// the top level path of the archive
|
||||
// in the underlying file system
|
||||
private Path archivePath;
|
||||
// the masterIndex of the archive
|
||||
private Path masterIndex;
|
||||
// the index file
|
||||
private Path archiveIndex;
|
||||
// the har auth
|
||||
private String harAuth;
|
||||
|
||||
/**
|
||||
* public construction of harfilesystem
|
||||
*
|
||||
*/
|
||||
public HarFileSystem() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor to create a HarFileSystem with an
|
||||
* underlying filesystem.
|
||||
* @param fs
|
||||
*/
|
||||
public HarFileSystem(FileSystem fs) {
|
||||
super(fs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize a Har filesystem per har archive. The
|
||||
* archive home directory is the top level directory
|
||||
* in the filesystem that contains the HAR archive.
|
||||
* Be careful with this method, you do not want to go
|
||||
* on creating new Filesystem instances per call to
|
||||
* path.getFileSystem().
|
||||
* the uri of Har is
|
||||
* har://underlyingfsscheme-host:port/archivepath.
|
||||
* or
|
||||
* har:///archivepath. This assumes the underlying filesystem
|
||||
* to be used in case not specified.
|
||||
*/
|
||||
public void initialize(URI name, Configuration conf) throws IOException {
|
||||
//decode the name
|
||||
underLyingURI = decodeHarURI(name, conf);
|
||||
// we got the right har Path- now check if this is
|
||||
//truly a har filesystem
|
||||
Path harPath = archivePath(new Path(name.toString()));
|
||||
if (harPath == null) {
|
||||
throw new IOException("Invalid path for the Har Filesystem. " +
|
||||
name.toString());
|
||||
}
|
||||
if (fs == null) {
|
||||
fs = FileSystem.get(underLyingURI, conf);
|
||||
}
|
||||
this.uri = harPath.toUri();
|
||||
this.archivePath = new Path(this.uri.getPath());
|
||||
this.harAuth = getHarAuth(this.underLyingURI);
|
||||
//check for the underlying fs containing
|
||||
// the index file
|
||||
this.masterIndex = new Path(archivePath, "_masterindex");
|
||||
this.archiveIndex = new Path(archivePath, "_index");
|
||||
if (!fs.exists(masterIndex) || !fs.exists(archiveIndex)) {
|
||||
throw new IOException("Invalid path for the Har Filesystem. " +
|
||||
"No index file in " + harPath);
|
||||
}
|
||||
try{
|
||||
this.version = getHarVersion();
|
||||
} catch(IOException io) {
|
||||
throw new IOException("Unable to " +
|
||||
"read the version of the Har file system: " + this.archivePath);
|
||||
}
|
||||
if (this.version != HarFileSystem.VERSION) {
|
||||
throw new IOException("Invalid version " +
|
||||
this.version + " expected " + HarFileSystem.VERSION);
|
||||
}
|
||||
}
|
||||
|
||||
// get the version of the filesystem from the masterindex file
|
||||
// the version is currently not useful since its the first version
|
||||
// of archives
|
||||
public int getHarVersion() throws IOException {
|
||||
FSDataInputStream masterIn = fs.open(masterIndex);
|
||||
LineReader lmaster = new LineReader(masterIn, getConf());
|
||||
Text line = new Text();
|
||||
lmaster.readLine(line);
|
||||
try {
|
||||
masterIn.close();
|
||||
} catch(IOException e){
|
||||
//disregard it.
|
||||
// its a read.
|
||||
}
|
||||
String versionLine = line.toString();
|
||||
String[] arr = versionLine.split(" ");
|
||||
int version = Integer.parseInt(arr[0]);
|
||||
return version;
|
||||
}
|
||||
|
||||
/*
|
||||
* find the parent path that is the
|
||||
* archive path in the path. The last
|
||||
* path segment that ends with .har is
|
||||
* the path that will be returned.
|
||||
*/
|
||||
private Path archivePath(Path p) {
|
||||
Path retPath = null;
|
||||
Path tmp = p;
|
||||
for (int i=0; i< p.depth(); i++) {
|
||||
if (tmp.toString().endsWith(".har")) {
|
||||
retPath = tmp;
|
||||
break;
|
||||
}
|
||||
tmp = tmp.getParent();
|
||||
}
|
||||
return retPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* decode the raw URI to get the underlying URI
|
||||
* @param rawURI raw Har URI
|
||||
* @return filtered URI of the underlying fileSystem
|
||||
*/
|
||||
private URI decodeHarURI(URI rawURI, Configuration conf) throws IOException {
|
||||
String tmpAuth = rawURI.getAuthority();
|
||||
//we are using the default file
|
||||
//system in the config
|
||||
//so create a underlying uri and
|
||||
//return it
|
||||
if (tmpAuth == null) {
|
||||
//create a path
|
||||
return FileSystem.getDefaultUri(conf);
|
||||
}
|
||||
String host = rawURI.getHost();
|
||||
String[] str = host.split("-", 2);
|
||||
if (str[0] == null) {
|
||||
throw new IOException("URI: " + rawURI + " is an invalid Har URI.");
|
||||
}
|
||||
String underLyingScheme = str[0];
|
||||
String underLyingHost = (str.length > 1)? str[1]:null;
|
||||
int underLyingPort = rawURI.getPort();
|
||||
String auth = (underLyingHost == null && underLyingPort == -1)?
|
||||
null:(underLyingHost+":"+underLyingPort);
|
||||
URI tmp = null;
|
||||
if (rawURI.getQuery() != null) {
|
||||
// query component not allowed
|
||||
throw new IOException("query component in Path not supported " + rawURI);
|
||||
}
|
||||
try {
|
||||
tmp = new URI(underLyingScheme, auth, rawURI.getPath(),
|
||||
rawURI.getQuery(), rawURI.getFragment());
|
||||
} catch (URISyntaxException e) {
|
||||
// do nothing should not happen
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
|
||||
/**
|
||||
* return the top level archive.
|
||||
*/
|
||||
public Path getWorkingDirectory() {
|
||||
return new Path(uri.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a har specific auth
|
||||
* har-underlyingfs:port
|
||||
* @param underLyingURI the uri of underlying
|
||||
* filesystem
|
||||
* @return har specific auth
|
||||
*/
|
||||
private String getHarAuth(URI underLyingUri) {
|
||||
String auth = underLyingUri.getScheme() + "-";
|
||||
if (underLyingUri.getHost() != null) {
|
||||
auth += underLyingUri.getHost() + ":";
|
||||
if (underLyingUri.getPort() != -1) {
|
||||
auth += underLyingUri.getPort();
|
||||
}
|
||||
}
|
||||
else {
|
||||
auth += ":";
|
||||
}
|
||||
return auth;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the uri of this filesystem.
|
||||
* The uri is of the form
|
||||
* har://underlyingfsschema-host:port/pathintheunderlyingfs
|
||||
*/
|
||||
@Override
|
||||
public URI getUri() {
|
||||
return this.uri;
|
||||
}
|
||||
|
||||
/**
|
||||
* this method returns the path
|
||||
* inside the har filesystem.
|
||||
* this is relative path inside
|
||||
* the har filesystem.
|
||||
* @param path the fully qualified path in the har filesystem.
|
||||
* @return relative path in the filesystem.
|
||||
*/
|
||||
private Path getPathInHar(Path path) {
|
||||
Path harPath = new Path(path.toUri().getPath());
|
||||
if (archivePath.compareTo(harPath) == 0)
|
||||
return new Path(Path.SEPARATOR);
|
||||
Path tmp = new Path(harPath.getName());
|
||||
Path parent = harPath.getParent();
|
||||
while (!(parent.compareTo(archivePath) == 0)) {
|
||||
if (parent.toString().equals(Path.SEPARATOR)) {
|
||||
tmp = null;
|
||||
break;
|
||||
}
|
||||
tmp = new Path(parent.getName(), tmp);
|
||||
parent = parent.getParent();
|
||||
}
|
||||
if (tmp != null)
|
||||
tmp = new Path(Path.SEPARATOR, tmp);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
//the relative path of p. basically
|
||||
// getting rid of /. Parsing and doing
|
||||
// string manipulation is not good - so
|
||||
// just use the path api to do it.
|
||||
private Path makeRelative(String initial, Path p) {
|
||||
Path root = new Path(Path.SEPARATOR);
|
||||
if (root.compareTo(p) == 0)
|
||||
return new Path(initial);
|
||||
Path retPath = new Path(p.getName());
|
||||
Path parent = p.getParent();
|
||||
for (int i=0; i < p.depth()-1; i++) {
|
||||
retPath = new Path(parent.getName(), retPath);
|
||||
parent = parent.getParent();
|
||||
}
|
||||
return new Path(initial, retPath.toString());
|
||||
}
|
||||
|
||||
/* this makes a path qualified in the har filesystem
|
||||
* (non-Javadoc)
|
||||
* @see org.apache.hadoop.fs.FilterFileSystem#makeQualified(
|
||||
* org.apache.hadoop.fs.Path)
|
||||
*/
|
||||
@Override
|
||||
public Path makeQualified(Path path) {
|
||||
// make sure that we just get the
|
||||
// path component
|
||||
Path fsPath = path;
|
||||
if (!path.isAbsolute()) {
|
||||
fsPath = new Path(archivePath, path);
|
||||
}
|
||||
|
||||
URI tmpURI = fsPath.toUri();
|
||||
fsPath = new Path(tmpURI.getPath());
|
||||
//change this to Har uri
|
||||
URI tmp = null;
|
||||
try {
|
||||
tmp = new URI(uri.getScheme(), harAuth, fsPath.toString(),
|
||||
tmpURI.getQuery(), tmpURI.getFragment());
|
||||
} catch(URISyntaxException ue) {
|
||||
LOG.error("Error in URI ", ue);
|
||||
}
|
||||
if (tmp != null) {
|
||||
return new Path(tmp.toString());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* get block locations from the underlying fs
|
||||
* @param file the input filestatus to get block locations
|
||||
* @param start the start in the file
|
||||
* @param len the length in the file
|
||||
* @return block locations for this segment of file
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
|
||||
long len) throws IOException {
|
||||
// need to look up the file in the underlying fs
|
||||
// look up the index
|
||||
|
||||
// make sure this is a prt of this har filesystem
|
||||
Path p = makeQualified(file.getPath());
|
||||
Path harPath = getPathInHar(p);
|
||||
String line = fileStatusInIndex(harPath);
|
||||
if (line == null) {
|
||||
throw new FileNotFoundException("File " + file.getPath() + " not found");
|
||||
}
|
||||
HarStatus harStatus = new HarStatus(line);
|
||||
if (harStatus.isDir()) {
|
||||
return new BlockLocation[0];
|
||||
}
|
||||
FileStatus fsFile = fs.getFileStatus(new Path(archivePath,
|
||||
harStatus.getPartName()));
|
||||
BlockLocation[] rawBlocks = fs.getFileBlockLocations(fsFile,
|
||||
harStatus.getStartIndex() + start, len);
|
||||
return fakeBlockLocations(rawBlocks, harStatus.getStartIndex());
|
||||
}
|
||||
|
||||
/**
|
||||
* fake the rawblocks since map reduce uses the block offsets to
|
||||
* fo some computations regarding the blocks
|
||||
* @param rawBlocks the raw blocks returned by the filesystem
|
||||
* @return faked blocks with changed offsets.
|
||||
*/
|
||||
private BlockLocation[] fakeBlockLocations(BlockLocation[] rawBlocks,
|
||||
long startIndex) {
|
||||
for (BlockLocation block : rawBlocks) {
|
||||
long rawOffset = block.getOffset();
|
||||
block.setOffset(rawOffset - startIndex);
|
||||
}
|
||||
return rawBlocks;
|
||||
}
|
||||
|
||||
/**
|
||||
* the hash of the path p inside iniside
|
||||
* the filesystem
|
||||
* @param p the path in the harfilesystem
|
||||
* @return the hash code of the path.
|
||||
*/
|
||||
public static int getHarHash(Path p) {
|
||||
return (p.toString().hashCode() & 0x7fffffff);
|
||||
}
|
||||
|
||||
static class Store {
|
||||
public Store() {
|
||||
begin = end = startHash = endHash = 0;
|
||||
}
|
||||
public Store(long begin, long end, int startHash, int endHash) {
|
||||
this.begin = begin;
|
||||
this.end = end;
|
||||
this.startHash = startHash;
|
||||
this.endHash = endHash;
|
||||
}
|
||||
public long begin;
|
||||
public long end;
|
||||
public int startHash;
|
||||
public int endHash;
|
||||
}
|
||||
|
||||
// make sure that this harPath is relative to the har filesystem
|
||||
// this only works for relative paths. This returns the line matching
|
||||
// the file in the index. Returns a null if there is not matching
|
||||
// filename in the index file.
|
||||
private String fileStatusInIndex(Path harPath) throws IOException {
|
||||
// read the index file
|
||||
int hashCode = getHarHash(harPath);
|
||||
// get the master index to find the pos
|
||||
// in the index file
|
||||
FSDataInputStream in = fs.open(masterIndex);
|
||||
FileStatus masterStat = fs.getFileStatus(masterIndex);
|
||||
LineReader lin = new LineReader(in, getConf());
|
||||
Text line = new Text();
|
||||
long read = lin.readLine(line);
|
||||
//ignore the first line. this is the header of the index files
|
||||
String[] readStr = null;
|
||||
List<Store> stores = new ArrayList<Store>();
|
||||
while(read < masterStat.getLen()) {
|
||||
int b = lin.readLine(line);
|
||||
read += b;
|
||||
readStr = line.toString().split(" ");
|
||||
int startHash = Integer.parseInt(readStr[0]);
|
||||
int endHash = Integer.parseInt(readStr[1]);
|
||||
if (startHash <= hashCode && hashCode <= endHash) {
|
||||
stores.add(new Store(Long.parseLong(readStr[2]),
|
||||
Long.parseLong(readStr[3]), startHash,
|
||||
endHash));
|
||||
}
|
||||
line.clear();
|
||||
}
|
||||
try {
|
||||
lin.close();
|
||||
} catch(IOException io){
|
||||
// do nothing just a read.
|
||||
}
|
||||
FSDataInputStream aIn = fs.open(archiveIndex);
|
||||
LineReader aLin = new LineReader(aIn, getConf());
|
||||
String retStr = null;
|
||||
// now start reading the real index file
|
||||
read = 0;
|
||||
for (Store s: stores) {
|
||||
aIn.seek(s.begin);
|
||||
while (read + s.begin < s.end) {
|
||||
int tmp = aLin.readLine(line);
|
||||
read += tmp;
|
||||
String lineFeed = line.toString();
|
||||
String[] parsed = lineFeed.split(" ");
|
||||
if (harPath.compareTo(new Path(parsed[0])) == 0) {
|
||||
// bingo!
|
||||
retStr = lineFeed;
|
||||
break;
|
||||
}
|
||||
line.clear();
|
||||
}
|
||||
if (retStr != null)
|
||||
break;
|
||||
}
|
||||
try {
|
||||
aIn.close();
|
||||
} catch(IOException io) {
|
||||
//do nothing
|
||||
}
|
||||
return retStr;
|
||||
}
|
||||
|
||||
// a single line parser for hadoop archives status
|
||||
// stored in a single line in the index files
|
||||
// the format is of the form
|
||||
// filename "dir"/"file" partFileName startIndex length
|
||||
// <space seperated children>
|
||||
private static class HarStatus {
|
||||
boolean isDir;
|
||||
String name;
|
||||
List<String> children;
|
||||
String partName;
|
||||
long startIndex;
|
||||
long length;
|
||||
public HarStatus(String harString) {
|
||||
String[] splits = harString.split(" ");
|
||||
this.name = splits[0];
|
||||
this.isDir = "dir".equals(splits[1]) ? true: false;
|
||||
// this is equal to "none" if its a directory
|
||||
this.partName = splits[2];
|
||||
this.startIndex = Long.parseLong(splits[3]);
|
||||
this.length = Long.parseLong(splits[4]);
|
||||
if (isDir) {
|
||||
children = new ArrayList<String>();
|
||||
for (int i = 5; i < splits.length; i++) {
|
||||
children.add(splits[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
public boolean isDir() {
|
||||
return isDir;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public List<String> getChildren() {
|
||||
return children;
|
||||
}
|
||||
public String getFileName() {
|
||||
return name;
|
||||
}
|
||||
public String getPartName() {
|
||||
return partName;
|
||||
}
|
||||
public long getStartIndex() {
|
||||
return startIndex;
|
||||
}
|
||||
public long getLength() {
|
||||
return length;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* return the filestatus of files in har archive.
|
||||
* The permission returned are that of the archive
|
||||
* index files. The permissions are not persisted
|
||||
* while creating a hadoop archive.
|
||||
* @param f the path in har filesystem
|
||||
* @return filestatus.
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public FileStatus getFileStatus(Path f) throws IOException {
|
||||
FileStatus archiveStatus = fs.getFileStatus(archiveIndex);
|
||||
// get the fs DataInputStream for the underlying file
|
||||
// look up the index.
|
||||
Path p = makeQualified(f);
|
||||
Path harPath = getPathInHar(p);
|
||||
if (harPath == null) {
|
||||
throw new IOException("Invalid file name: " + f + " in " + uri);
|
||||
}
|
||||
String readStr = fileStatusInIndex(harPath);
|
||||
if (readStr == null) {
|
||||
throw new FileNotFoundException("File: " + f + " does not exist in " + uri);
|
||||
}
|
||||
HarStatus hstatus = null;
|
||||
hstatus = new HarStatus(readStr);
|
||||
return new FileStatus(hstatus.isDir()?0:hstatus.getLength(), hstatus.isDir(),
|
||||
(int)archiveStatus.getReplication(), archiveStatus.getBlockSize(),
|
||||
archiveStatus.getModificationTime(), archiveStatus.getAccessTime(),
|
||||
new FsPermission(
|
||||
archiveStatus.getPermission()), archiveStatus.getOwner(),
|
||||
archiveStatus.getGroup(),
|
||||
makeRelative(this.uri.toString(), new Path(hstatus.name)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a har input stream which fakes end of
|
||||
* file. It reads the index files to get the part
|
||||
* file name and the size and start of the file.
|
||||
*/
|
||||
@Override
|
||||
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
|
||||
// get the fs DataInputStream for the underlying file
|
||||
// look up the index.
|
||||
Path p = makeQualified(f);
|
||||
Path harPath = getPathInHar(p);
|
||||
if (harPath == null) {
|
||||
throw new IOException("Invalid file name: " + f + " in " + uri);
|
||||
}
|
||||
String readStr = fileStatusInIndex(harPath);
|
||||
if (readStr == null) {
|
||||
throw new FileNotFoundException(f + ": not found in " + archivePath);
|
||||
}
|
||||
HarStatus hstatus = new HarStatus(readStr);
|
||||
// we got it.. woo hooo!!!
|
||||
if (hstatus.isDir()) {
|
||||
throw new FileNotFoundException(f + " : not a file in " +
|
||||
archivePath);
|
||||
}
|
||||
return new HarFSDataInputStream(fs, new Path(archivePath,
|
||||
hstatus.getPartName()),
|
||||
hstatus.getStartIndex(), hstatus.getLength(), bufferSize);
|
||||
}
|
||||
|
||||
/*
|
||||
* create throws an exception in Har filesystem.
|
||||
* The archive once created cannot be changed.
|
||||
*/
|
||||
public FSDataOutputStream create(Path f, int bufferSize)
|
||||
throws IOException {
|
||||
throw new IOException("Har: Create not allowed");
|
||||
}
|
||||
|
||||
public FSDataOutputStream create(Path f,
|
||||
FsPermission permission,
|
||||
boolean overwrite,
|
||||
int bufferSize,
|
||||
short replication,
|
||||
long blockSize,
|
||||
Progressable progress) throws IOException {
|
||||
throw new IOException("Har: create not allowed.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (fs != null) {
|
||||
try {
|
||||
fs.close();
|
||||
} catch(IOException ie) {
|
||||
//this might already be closed
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Not implemented.
|
||||
*/
|
||||
@Override
|
||||
public boolean setReplication(Path src, short replication) throws IOException{
|
||||
throw new IOException("Har: setreplication not allowed");
|
||||
}
|
||||
|
||||
/**
|
||||
* Not implemented.
|
||||
*/
|
||||
@Override
|
||||
public boolean delete(Path f, boolean recursive) throws IOException {
|
||||
throw new IOException("Har: delete not allowed");
|
||||
}
|
||||
|
||||
/**
|
||||
* liststatus returns the children of a directory
|
||||
* after looking up the index files.
|
||||
*/
|
||||
@Override
|
||||
public FileStatus[] listStatus(Path f) throws IOException {
|
||||
//need to see if the file is an index in file
|
||||
//get the filestatus of the archive directory
|
||||
// we will create fake filestatuses to return
|
||||
// to the client
|
||||
List<FileStatus> statuses = new ArrayList<FileStatus>();
|
||||
FileStatus archiveStatus = fs.getFileStatus(archiveIndex);
|
||||
Path tmpPath = makeQualified(f);
|
||||
Path harPath = getPathInHar(tmpPath);
|
||||
String readStr = fileStatusInIndex(harPath);
|
||||
if (readStr == null) {
|
||||
throw new FileNotFoundException("File " + f + " not found in " + archivePath);
|
||||
}
|
||||
HarStatus hstatus = new HarStatus(readStr);
|
||||
if (!hstatus.isDir())
|
||||
statuses.add(new FileStatus(hstatus.getLength(),
|
||||
hstatus.isDir(),
|
||||
archiveStatus.getReplication(), archiveStatus.getBlockSize(),
|
||||
archiveStatus.getModificationTime(), archiveStatus.getAccessTime(),
|
||||
new FsPermission(archiveStatus.getPermission()),
|
||||
archiveStatus.getOwner(), archiveStatus.getGroup(),
|
||||
makeRelative(this.uri.toString(), new Path(hstatus.name))));
|
||||
else
|
||||
for (String child: hstatus.children) {
|
||||
FileStatus tmp = getFileStatus(new Path(tmpPath, child));
|
||||
statuses.add(tmp);
|
||||
}
|
||||
return statuses.toArray(new FileStatus[statuses.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
* return the top level archive path.
|
||||
*/
|
||||
public Path getHomeDirectory() {
|
||||
return new Path(uri.toString());
|
||||
}
|
||||
|
||||
public void setWorkingDirectory(Path newDir) {
|
||||
//does nothing.
|
||||
}
|
||||
|
||||
/**
|
||||
* not implemented.
|
||||
*/
|
||||
public boolean mkdirs(Path f, FsPermission permission) throws IOException {
|
||||
throw new IOException("Har: mkdirs not allowed");
|
||||
}
|
||||
|
||||
/**
|
||||
* not implemented.
|
||||
*/
|
||||
public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws
|
||||
IOException {
|
||||
throw new IOException("Har: copyfromlocalfile not allowed");
|
||||
}
|
||||
|
||||
/**
|
||||
* copies the file in the har filesystem to a local file.
|
||||
*/
|
||||
public void copyToLocalFile(boolean delSrc, Path src, Path dst)
|
||||
throws IOException {
|
||||
FileUtil.copy(this, src, getLocal(getConf()), dst, false, getConf());
|
||||
}
|
||||
|
||||
/**
|
||||
* not implemented.
|
||||
*/
|
||||
public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
|
||||
throws IOException {
|
||||
throw new IOException("Har: startLocalOutput not allowed");
|
||||
}
|
||||
|
||||
/**
|
||||
* not implemented.
|
||||
*/
|
||||
public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
|
||||
throws IOException {
|
||||
throw new IOException("Har: completeLocalOutput not allowed");
|
||||
}
|
||||
|
||||
/**
|
||||
* not implemented.
|
||||
*/
|
||||
public void setOwner(Path p, String username, String groupname)
|
||||
throws IOException {
|
||||
throw new IOException("Har: setowner not allowed");
|
||||
}
|
||||
|
||||
/**
|
||||
* Not implemented.
|
||||
*/
|
||||
public void setPermission(Path p, FsPermission permisssion)
|
||||
throws IOException {
|
||||
throw new IOException("Har: setPermission not allowed");
|
||||
}
|
||||
|
||||
/**
|
||||
* Hadoop archives input stream. This input stream fakes EOF
|
||||
* since archive files are part of bigger part files.
|
||||
*/
|
||||
private static class HarFSDataInputStream extends FSDataInputStream {
|
||||
/**
|
||||
* Create an input stream that fakes all the reads/positions/seeking.
|
||||
*/
|
||||
private static class HarFsInputStream extends FSInputStream {
|
||||
private long position, start, end;
|
||||
//The underlying data input stream that the
|
||||
// underlying filesystem will return.
|
||||
private FSDataInputStream underLyingStream;
|
||||
//one byte buffer
|
||||
private byte[] oneBytebuff = new byte[1];
|
||||
HarFsInputStream(FileSystem fs, Path path, long start,
|
||||
long length, int bufferSize) throws IOException {
|
||||
underLyingStream = fs.open(path, bufferSize);
|
||||
underLyingStream.seek(start);
|
||||
// the start of this file in the part file
|
||||
this.start = start;
|
||||
// the position pointer in the part file
|
||||
this.position = start;
|
||||
// the end pointer in the part file
|
||||
this.end = start + length;
|
||||
}
|
||||
|
||||
public synchronized int available() throws IOException {
|
||||
long remaining = end - underLyingStream.getPos();
|
||||
if (remaining > (long)Integer.MAX_VALUE) {
|
||||
return Integer.MAX_VALUE;
|
||||
}
|
||||
return (int) remaining;
|
||||
}
|
||||
|
||||
public synchronized void close() throws IOException {
|
||||
underLyingStream.close();
|
||||
super.close();
|
||||
}
|
||||
|
||||
//not implemented
|
||||
@Override
|
||||
public void mark(int readLimit) {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
/**
|
||||
* reset is not implemented
|
||||
*/
|
||||
public void reset() throws IOException {
|
||||
throw new IOException("reset not implemented.");
|
||||
}
|
||||
|
||||
public synchronized int read() throws IOException {
|
||||
int ret = read(oneBytebuff, 0, 1);
|
||||
return (ret <= 0) ? -1: (oneBytebuff[0] & 0xff);
|
||||
}
|
||||
|
||||
public synchronized int read(byte[] b) throws IOException {
|
||||
int ret = read(b, 0, b.length);
|
||||
if (ret != -1) {
|
||||
position += ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public synchronized int read(byte[] b, int offset, int len)
|
||||
throws IOException {
|
||||
int newlen = len;
|
||||
int ret = -1;
|
||||
if (position + len > end) {
|
||||
newlen = (int) (end - position);
|
||||
}
|
||||
// end case
|
||||
if (newlen == 0)
|
||||
return ret;
|
||||
ret = underLyingStream.read(b, offset, newlen);
|
||||
position += ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
public synchronized long skip(long n) throws IOException {
|
||||
long tmpN = n;
|
||||
if (tmpN > 0) {
|
||||
if (position + tmpN > end) {
|
||||
tmpN = end - position;
|
||||
}
|
||||
underLyingStream.seek(tmpN + position);
|
||||
position += tmpN;
|
||||
return tmpN;
|
||||
}
|
||||
return (tmpN < 0)? -1 : 0;
|
||||
}
|
||||
|
||||
public synchronized long getPos() throws IOException {
|
||||
return (position - start);
|
||||
}
|
||||
|
||||
public synchronized void seek(long pos) throws IOException {
|
||||
if (pos < 0 || (start + pos > end)) {
|
||||
throw new IOException("Failed to seek: EOF");
|
||||
}
|
||||
position = start + pos;
|
||||
underLyingStream.seek(position);
|
||||
}
|
||||
|
||||
public boolean seekToNewSource(long targetPos) throws IOException {
|
||||
//do not need to implement this
|
||||
// hdfs in itself does seektonewsource
|
||||
// while reading.
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* implementing position readable.
|
||||
*/
|
||||
public int read(long pos, byte[] b, int offset, int length)
|
||||
throws IOException {
|
||||
int nlength = length;
|
||||
if (start + nlength + pos > end) {
|
||||
nlength = (int) (end - (start + pos));
|
||||
}
|
||||
return underLyingStream.read(pos + start , b, offset, nlength);
|
||||
}
|
||||
|
||||
/**
|
||||
* position readable again.
|
||||
*/
|
||||
public void readFully(long pos, byte[] b, int offset, int length)
|
||||
throws IOException {
|
||||
if (start + length + pos > end) {
|
||||
throw new IOException("Not enough bytes to read.");
|
||||
}
|
||||
underLyingStream.readFully(pos + start, b, offset, length);
|
||||
}
|
||||
|
||||
public void readFully(long pos, byte[] b) throws IOException {
|
||||
readFully(pos, b, 0, b.length);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* constructors for har input stream.
|
||||
* @param fs the underlying filesystem
|
||||
* @param p The path in the underlying filesystem
|
||||
* @param start the start position in the part file
|
||||
* @param length the length of valid data in the part file
|
||||
* @param bufsize the buffer size
|
||||
* @throws IOException
|
||||
*/
|
||||
public HarFSDataInputStream(FileSystem fs, Path p, long start,
|
||||
long length, int bufsize) throws IOException {
|
||||
super(new HarFsInputStream(fs, p, start, length, bufsize));
|
||||
}
|
||||
|
||||
/**
|
||||
* constructor for har input stream.
|
||||
* @param fs the underlying filesystem
|
||||
* @param p the path in the underlying file system
|
||||
* @param start the start position in the part file
|
||||
* @param length the length of valid data in the part file.
|
||||
* @throws IOException
|
||||
*/
|
||||
public HarFSDataInputStream(FileSystem fs, Path p, long start, long length)
|
||||
throws IOException {
|
||||
super(new HarFsInputStream(fs, p, start, length, 0));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,418 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.logging.*;
|
||||
|
||||
import org.apache.hadoop.util.*;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
/** An implementation of a round-robin scheme for disk allocation for creating
|
||||
* files. The way it works is that it is kept track what disk was last
|
||||
* allocated for a file write. For the current request, the next disk from
|
||||
* the set of disks would be allocated if the free space on the disk is
|
||||
* sufficient enough to accomodate the file that is being considered for
|
||||
* creation. If the space requirements cannot be met, the next disk in order
|
||||
* would be tried and so on till a disk is found with sufficient capacity.
|
||||
* Once a disk with sufficient space is identified, a check is done to make
|
||||
* sure that the disk is writable. Also, there is an API provided that doesn't
|
||||
* take the space requirements into consideration but just checks whether the
|
||||
* disk under consideration is writable (this should be used for cases where
|
||||
* the file size is not known apriori). An API is provided to read a path that
|
||||
* was created earlier. That API works by doing a scan of all the disks for the
|
||||
* input pathname.
|
||||
* This implementation also provides the functionality of having multiple
|
||||
* allocators per JVM (one for each unique functionality or context, like
|
||||
* mapred, dfs-client, etc.). It ensures that there is only one instance of
|
||||
* an allocator per context per JVM.
|
||||
* Note:
|
||||
* 1. The contexts referred above are actually the configuration items defined
|
||||
* in the Configuration class like "mapred.local.dir" (for which we want to
|
||||
* control the dir allocations). The context-strings are exactly those
|
||||
* configuration items.
|
||||
* 2. This implementation does not take into consideration cases where
|
||||
* a disk becomes read-only or goes out of space while a file is being written
|
||||
* to (disks are shared between multiple processes, and so the latter situation
|
||||
* is probable).
|
||||
* 3. In the class implementation, "Disk" is referred to as "Dir", which
|
||||
* actually points to the configured directory on the Disk which will be the
|
||||
* parent for all file write/read allocations.
|
||||
*/
|
||||
public class LocalDirAllocator {
|
||||
|
||||
//A Map from the config item names like "mapred.local.dir",
|
||||
//"dfs.client.buffer.dir" to the instance of the AllocatorPerContext. This
|
||||
//is a static object to make sure there exists exactly one instance per JVM
|
||||
private static Map <String, AllocatorPerContext> contexts =
|
||||
new TreeMap<String, AllocatorPerContext>();
|
||||
private String contextCfgItemName;
|
||||
|
||||
/**Create an allocator object
|
||||
* @param contextCfgItemName
|
||||
*/
|
||||
public LocalDirAllocator(String contextCfgItemName) {
|
||||
this.contextCfgItemName = contextCfgItemName;
|
||||
}
|
||||
|
||||
/** This method must be used to obtain the dir allocation context for a
|
||||
* particular value of the context name. The context name must be an item
|
||||
* defined in the Configuration object for which we want to control the
|
||||
* dir allocations (e.g., <code>mapred.local.dir</code>). The method will
|
||||
* create a context for that name if it doesn't already exist.
|
||||
*/
|
||||
private AllocatorPerContext obtainContext(String contextCfgItemName) {
|
||||
synchronized (contexts) {
|
||||
AllocatorPerContext l = contexts.get(contextCfgItemName);
|
||||
if (l == null) {
|
||||
contexts.put(contextCfgItemName,
|
||||
(l = new AllocatorPerContext(contextCfgItemName)));
|
||||
}
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
/** Get a path from the local FS. This method should be used if the size of
|
||||
* the file is not known apriori. We go round-robin over the set of disks
|
||||
* (via the configured dirs) and return the first complete path where
|
||||
* we could create the parent directory of the passed path.
|
||||
* @param pathStr the requested path (this will be created on the first
|
||||
* available disk)
|
||||
* @param conf the Configuration object
|
||||
* @return the complete path to the file on a local disk
|
||||
* @throws IOException
|
||||
*/
|
||||
public Path getLocalPathForWrite(String pathStr,
|
||||
Configuration conf) throws IOException {
|
||||
return getLocalPathForWrite(pathStr, -1, conf);
|
||||
}
|
||||
|
||||
/** Get a path from the local FS. Pass size as -1 if not known apriori. We
|
||||
* round-robin over the set of disks (via the configured dirs) and return
|
||||
* the first complete path which has enough space
|
||||
* @param pathStr the requested path (this will be created on the first
|
||||
* available disk)
|
||||
* @param size the size of the file that is going to be written
|
||||
* @param conf the Configuration object
|
||||
* @return the complete path to the file on a local disk
|
||||
* @throws IOException
|
||||
*/
|
||||
public Path getLocalPathForWrite(String pathStr, long size,
|
||||
Configuration conf) throws IOException {
|
||||
AllocatorPerContext context = obtainContext(contextCfgItemName);
|
||||
return context.getLocalPathForWrite(pathStr, size, conf);
|
||||
}
|
||||
|
||||
/** Get a path from the local FS for reading. We search through all the
|
||||
* configured dirs for the file's existence and return the complete
|
||||
* path to the file when we find one
|
||||
* @param pathStr the requested file (this will be searched)
|
||||
* @param conf the Configuration object
|
||||
* @return the complete path to the file on a local disk
|
||||
* @throws IOException
|
||||
*/
|
||||
public Path getLocalPathToRead(String pathStr,
|
||||
Configuration conf) throws IOException {
|
||||
AllocatorPerContext context = obtainContext(contextCfgItemName);
|
||||
return context.getLocalPathToRead(pathStr, conf);
|
||||
}
|
||||
|
||||
/** Creates a temporary file in the local FS. Pass size as -1 if not known
|
||||
* apriori. We round-robin over the set of disks (via the configured dirs)
|
||||
* and select the first complete path which has enough space. A file is
|
||||
* created on this directory. The file is guaranteed to go away when the
|
||||
* JVM exits.
|
||||
* @param pathStr prefix for the temporary file
|
||||
* @param size the size of the file that is going to be written
|
||||
* @param conf the Configuration object
|
||||
* @return a unique temporary file
|
||||
* @throws IOException
|
||||
*/
|
||||
public File createTmpFileForWrite(String pathStr, long size,
|
||||
Configuration conf) throws IOException {
|
||||
AllocatorPerContext context = obtainContext(contextCfgItemName);
|
||||
return context.createTmpFileForWrite(pathStr, size, conf);
|
||||
}
|
||||
|
||||
/** Method to check whether a context is valid
|
||||
* @param contextCfgItemName
|
||||
* @return true/false
|
||||
*/
|
||||
public static boolean isContextValid(String contextCfgItemName) {
|
||||
synchronized (contexts) {
|
||||
return contexts.containsKey(contextCfgItemName);
|
||||
}
|
||||
}
|
||||
|
||||
/** We search through all the configured dirs for the file's existence
|
||||
* and return true when we find
|
||||
* @param pathStr the requested file (this will be searched)
|
||||
* @param conf the Configuration object
|
||||
* @return true if files exist. false otherwise
|
||||
* @throws IOException
|
||||
*/
|
||||
public boolean ifExists(String pathStr,Configuration conf) {
|
||||
AllocatorPerContext context = obtainContext(contextCfgItemName);
|
||||
return context.ifExists(pathStr, conf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current directory index for the given configuration item.
|
||||
* @return the current directory index for the given configuration item.
|
||||
*/
|
||||
int getCurrentDirectoryIndex() {
|
||||
AllocatorPerContext context = obtainContext(contextCfgItemName);
|
||||
return context.getCurrentDirectoryIndex();
|
||||
}
|
||||
|
||||
private static class AllocatorPerContext {
|
||||
|
||||
private final Log LOG =
|
||||
LogFactory.getLog(AllocatorPerContext.class);
|
||||
|
||||
private int dirNumLastAccessed;
|
||||
private Random dirIndexRandomizer = new Random();
|
||||
private FileSystem localFS;
|
||||
private DF[] dirDF;
|
||||
private String contextCfgItemName;
|
||||
private String[] localDirs;
|
||||
private String savedLocalDirs = "";
|
||||
|
||||
public AllocatorPerContext(String contextCfgItemName) {
|
||||
this.contextCfgItemName = contextCfgItemName;
|
||||
}
|
||||
|
||||
/** This method gets called everytime before any read/write to make sure
|
||||
* that any change to localDirs is reflected immediately.
|
||||
*/
|
||||
private void confChanged(Configuration conf) throws IOException {
|
||||
String newLocalDirs = conf.get(contextCfgItemName);
|
||||
if (!newLocalDirs.equals(savedLocalDirs)) {
|
||||
localDirs = conf.getStrings(contextCfgItemName);
|
||||
localFS = FileSystem.getLocal(conf);
|
||||
int numDirs = localDirs.length;
|
||||
ArrayList<String> dirs = new ArrayList<String>(numDirs);
|
||||
ArrayList<DF> dfList = new ArrayList<DF>(numDirs);
|
||||
for (int i = 0; i < numDirs; i++) {
|
||||
try {
|
||||
// filter problematic directories
|
||||
Path tmpDir = new Path(localDirs[i]);
|
||||
if(localFS.mkdirs(tmpDir)|| localFS.exists(tmpDir)) {
|
||||
try {
|
||||
DiskChecker.checkDir(new File(localDirs[i]));
|
||||
dirs.add(localDirs[i]);
|
||||
dfList.add(new DF(new File(localDirs[i]), 30000));
|
||||
} catch (DiskErrorException de) {
|
||||
LOG.warn( localDirs[i] + "is not writable\n" +
|
||||
StringUtils.stringifyException(de));
|
||||
}
|
||||
} else {
|
||||
LOG.warn( "Failed to create " + localDirs[i]);
|
||||
}
|
||||
} catch (IOException ie) {
|
||||
LOG.warn( "Failed to create " + localDirs[i] + ": " +
|
||||
ie.getMessage() + "\n" + StringUtils.stringifyException(ie));
|
||||
} //ignore
|
||||
}
|
||||
localDirs = dirs.toArray(new String[dirs.size()]);
|
||||
dirDF = dfList.toArray(new DF[dirs.size()]);
|
||||
savedLocalDirs = newLocalDirs;
|
||||
|
||||
// randomize the first disk picked in the round-robin selection
|
||||
dirNumLastAccessed = dirIndexRandomizer.nextInt(dirs.size());
|
||||
}
|
||||
}
|
||||
|
||||
private Path createPath(String path) throws IOException {
|
||||
Path file = new Path(new Path(localDirs[dirNumLastAccessed]),
|
||||
path);
|
||||
//check whether we are able to create a directory here. If the disk
|
||||
//happens to be RDONLY we will fail
|
||||
try {
|
||||
DiskChecker.checkDir(new File(file.getParent().toUri().getPath()));
|
||||
return file;
|
||||
} catch (DiskErrorException d) {
|
||||
LOG.warn(StringUtils.stringifyException(d));
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current directory index.
|
||||
* @return the current directory index.
|
||||
*/
|
||||
int getCurrentDirectoryIndex() {
|
||||
return dirNumLastAccessed;
|
||||
}
|
||||
|
||||
/** Get a path from the local FS. This method should be used if the size of
|
||||
* the file is not known a priori.
|
||||
*
|
||||
* It will use roulette selection, picking directories
|
||||
* with probability proportional to their available space.
|
||||
*/
|
||||
public synchronized Path getLocalPathForWrite(String path,
|
||||
Configuration conf) throws IOException {
|
||||
return getLocalPathForWrite(path, -1, conf);
|
||||
}
|
||||
|
||||
/** Get a path from the local FS. If size is known, we go
|
||||
* round-robin over the set of disks (via the configured dirs) and return
|
||||
* the first complete path which has enough space.
|
||||
*
|
||||
* If size is not known, use roulette selection -- pick directories
|
||||
* with probability proportional to their available space.
|
||||
*/
|
||||
public synchronized Path getLocalPathForWrite(String pathStr, long size,
|
||||
Configuration conf) throws IOException {
|
||||
confChanged(conf);
|
||||
int numDirs = localDirs.length;
|
||||
int numDirsSearched = 0;
|
||||
//remove the leading slash from the path (to make sure that the uri
|
||||
//resolution results in a valid path on the dir being checked)
|
||||
if (pathStr.startsWith("/")) {
|
||||
pathStr = pathStr.substring(1);
|
||||
}
|
||||
Path returnPath = null;
|
||||
|
||||
if(size == -1) { //do roulette selection: pick dir with probability
|
||||
//proportional to available size
|
||||
long[] availableOnDisk = new long[dirDF.length];
|
||||
long totalAvailable = 0;
|
||||
|
||||
//build the "roulette wheel"
|
||||
for(int i =0; i < dirDF.length; ++i) {
|
||||
availableOnDisk[i] = dirDF[i].getAvailable();
|
||||
totalAvailable += availableOnDisk[i];
|
||||
}
|
||||
|
||||
// Keep rolling the wheel till we get a valid path
|
||||
Random r = new java.util.Random();
|
||||
while (numDirsSearched < numDirs && returnPath == null) {
|
||||
long randomPosition = Math.abs(r.nextLong()) % totalAvailable;
|
||||
int dir = 0;
|
||||
while (randomPosition > availableOnDisk[dir]) {
|
||||
randomPosition -= availableOnDisk[dir];
|
||||
dir++;
|
||||
}
|
||||
dirNumLastAccessed = dir;
|
||||
returnPath = createPath(pathStr);
|
||||
if (returnPath == null) {
|
||||
totalAvailable -= availableOnDisk[dir];
|
||||
availableOnDisk[dir] = 0; // skip this disk
|
||||
numDirsSearched++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while (numDirsSearched < numDirs && returnPath == null) {
|
||||
long capacity = dirDF[dirNumLastAccessed].getAvailable();
|
||||
if (capacity > size) {
|
||||
returnPath = createPath(pathStr);
|
||||
}
|
||||
dirNumLastAccessed++;
|
||||
dirNumLastAccessed = dirNumLastAccessed % numDirs;
|
||||
numDirsSearched++;
|
||||
}
|
||||
}
|
||||
if (returnPath != null) {
|
||||
return returnPath;
|
||||
}
|
||||
|
||||
//no path found
|
||||
throw new DiskErrorException("Could not find any valid local " +
|
||||
"directory for " + pathStr);
|
||||
}
|
||||
|
||||
/** Creates a file on the local FS. Pass size as -1 if not known apriori. We
|
||||
* round-robin over the set of disks (via the configured dirs) and return
|
||||
* a file on the first path which has enough space. The file is guaranteed
|
||||
* to go away when the JVM exits.
|
||||
*/
|
||||
public File createTmpFileForWrite(String pathStr, long size,
|
||||
Configuration conf) throws IOException {
|
||||
|
||||
// find an appropriate directory
|
||||
Path path = getLocalPathForWrite(pathStr, size, conf);
|
||||
File dir = new File(path.getParent().toUri().getPath());
|
||||
String prefix = path.getName();
|
||||
|
||||
// create a temp file on this directory
|
||||
File result = File.createTempFile(prefix, null, dir);
|
||||
result.deleteOnExit();
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Get a path from the local FS for reading. We search through all the
|
||||
* configured dirs for the file's existence and return the complete
|
||||
* path to the file when we find one
|
||||
*/
|
||||
public synchronized Path getLocalPathToRead(String pathStr,
|
||||
Configuration conf) throws IOException {
|
||||
confChanged(conf);
|
||||
int numDirs = localDirs.length;
|
||||
int numDirsSearched = 0;
|
||||
//remove the leading slash from the path (to make sure that the uri
|
||||
//resolution results in a valid path on the dir being checked)
|
||||
if (pathStr.startsWith("/")) {
|
||||
pathStr = pathStr.substring(1);
|
||||
}
|
||||
while (numDirsSearched < numDirs) {
|
||||
Path file = new Path(localDirs[numDirsSearched], pathStr);
|
||||
if (localFS.exists(file)) {
|
||||
return file;
|
||||
}
|
||||
numDirsSearched++;
|
||||
}
|
||||
|
||||
//no path found
|
||||
throw new DiskErrorException ("Could not find " + pathStr +" in any of" +
|
||||
" the configured local directories");
|
||||
}
|
||||
|
||||
/** We search through all the configured dirs for the file's existence
|
||||
* and return true when we find one
|
||||
*/
|
||||
public synchronized boolean ifExists(String pathStr,Configuration conf) {
|
||||
try {
|
||||
int numDirs = localDirs.length;
|
||||
int numDirsSearched = 0;
|
||||
//remove the leading slash from the path (to make sure that the uri
|
||||
//resolution results in a valid path on the dir being checked)
|
||||
if (pathStr.startsWith("/")) {
|
||||
pathStr = pathStr.substring(1);
|
||||
}
|
||||
while (numDirsSearched < numDirs) {
|
||||
Path file = new Path(localDirs[numDirsSearched], pathStr);
|
||||
if (localFS.exists(file)) {
|
||||
return true;
|
||||
}
|
||||
numDirsSearched++;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// IGNORE and try again
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,115 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.URI;
|
||||
import java.util.*;
|
||||
|
||||
/****************************************************************
|
||||
* Implement the FileSystem API for the checksumed local filesystem.
|
||||
*
|
||||
*****************************************************************/
|
||||
public class LocalFileSystem extends ChecksumFileSystem {
|
||||
static final URI NAME = URI.create("file:///");
|
||||
static private Random rand = new Random();
|
||||
FileSystem rfs;
|
||||
|
||||
public LocalFileSystem() {
|
||||
this(new RawLocalFileSystem());
|
||||
}
|
||||
|
||||
public FileSystem getRaw() {
|
||||
return rfs;
|
||||
}
|
||||
|
||||
public LocalFileSystem(FileSystem rawLocalFileSystem) {
|
||||
super(rawLocalFileSystem);
|
||||
rfs = rawLocalFileSystem;
|
||||
}
|
||||
|
||||
/** Convert a path to a File. */
|
||||
public File pathToFile(Path path) {
|
||||
return ((RawLocalFileSystem)fs).pathToFile(path);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
|
||||
throws IOException {
|
||||
FileUtil.copy(this, src, this, dst, delSrc, getConf());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyToLocalFile(boolean delSrc, Path src, Path dst)
|
||||
throws IOException {
|
||||
FileUtil.copy(this, src, this, dst, delSrc, getConf());
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves files to a bad file directory on the same device, so that their
|
||||
* storage will not be reused.
|
||||
*/
|
||||
public boolean reportChecksumFailure(Path p, FSDataInputStream in,
|
||||
long inPos,
|
||||
FSDataInputStream sums, long sumsPos) {
|
||||
try {
|
||||
// canonicalize f
|
||||
File f = ((RawLocalFileSystem)fs).pathToFile(p).getCanonicalFile();
|
||||
|
||||
// find highest writable parent dir of f on the same device
|
||||
String device = new DF(f, getConf()).getMount();
|
||||
File parent = f.getParentFile();
|
||||
File dir = null;
|
||||
while (parent!=null && parent.canWrite() && parent.toString().startsWith(device)) {
|
||||
dir = parent;
|
||||
parent = parent.getParentFile();
|
||||
}
|
||||
|
||||
if (dir==null) {
|
||||
throw new IOException(
|
||||
"not able to find the highest writable parent dir");
|
||||
}
|
||||
|
||||
// move the file there
|
||||
File badDir = new File(dir, "bad_files");
|
||||
if (!badDir.mkdirs()) {
|
||||
if (!badDir.isDirectory()) {
|
||||
throw new IOException("Mkdirs failed to create " + badDir.toString());
|
||||
}
|
||||
}
|
||||
String suffix = "." + rand.nextInt();
|
||||
File badFile = new File(badDir, f.getName()+suffix);
|
||||
LOG.warn("Moving bad file " + f + " to " + badFile);
|
||||
in.close(); // close it first
|
||||
boolean b = f.renameTo(badFile); // rename it
|
||||
if (!b) {
|
||||
LOG.warn("Ignoring failure of renameTo");
|
||||
}
|
||||
// move checksum file too
|
||||
File checkFile = ((RawLocalFileSystem)fs).pathToFile(getChecksumFile(p));
|
||||
b = checkFile.renameTo(new File(badDir, checkFile.getName()+suffix));
|
||||
if (!b) {
|
||||
LOG.warn("Ignoring failure of renameTo");
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Error moving bad file " + p + ": " + e);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.io.MD5Hash;
|
||||
import org.apache.hadoop.io.WritableUtils;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.znerd.xmlenc.XMLOutputter;
|
||||
|
||||
/** MD5 of MD5 of CRC32. */
|
||||
public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
||||
public static final int LENGTH = MD5Hash.MD5_LEN
|
||||
+ (Integer.SIZE + Long.SIZE)/Byte.SIZE;
|
||||
|
||||
private int bytesPerCRC;
|
||||
private long crcPerBlock;
|
||||
private MD5Hash md5;
|
||||
|
||||
/** Same as this(0, 0, null) */
|
||||
public MD5MD5CRC32FileChecksum() {
|
||||
this(0, 0, null);
|
||||
}
|
||||
|
||||
/** Create a MD5FileChecksum */
|
||||
public MD5MD5CRC32FileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
|
||||
this.bytesPerCRC = bytesPerCRC;
|
||||
this.crcPerBlock = crcPerBlock;
|
||||
this.md5 = md5;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public String getAlgorithmName() {
|
||||
return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + "CRC32";
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public int getLength() {return LENGTH;}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public byte[] getBytes() {
|
||||
return WritableUtils.toByteArray(this);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
bytesPerCRC = in.readInt();
|
||||
crcPerBlock = in.readLong();
|
||||
md5 = MD5Hash.read(in);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeInt(bytesPerCRC);
|
||||
out.writeLong(crcPerBlock);
|
||||
md5.write(out);
|
||||
}
|
||||
|
||||
/** Write that object to xml output. */
|
||||
public static void write(XMLOutputter xml, MD5MD5CRC32FileChecksum that
|
||||
) throws IOException {
|
||||
xml.startTag(MD5MD5CRC32FileChecksum.class.getName());
|
||||
if (that != null) {
|
||||
xml.attribute("bytesPerCRC", "" + that.bytesPerCRC);
|
||||
xml.attribute("crcPerBlock", "" + that.crcPerBlock);
|
||||
xml.attribute("md5", "" + that.md5);
|
||||
}
|
||||
xml.endTag();
|
||||
}
|
||||
|
||||
/** Return the object represented in the attributes. */
|
||||
public static MD5MD5CRC32FileChecksum valueOf(Attributes attrs
|
||||
) throws SAXException {
|
||||
final String bytesPerCRC = attrs.getValue("bytesPerCRC");
|
||||
final String crcPerBlock = attrs.getValue("crcPerBlock");
|
||||
final String md5 = attrs.getValue("md5");
|
||||
if (bytesPerCRC == null || crcPerBlock == null || md5 == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return new MD5MD5CRC32FileChecksum(Integer.valueOf(bytesPerCRC),
|
||||
Integer.valueOf(crcPerBlock), new MD5Hash(md5));
|
||||
} catch(Exception e) {
|
||||
throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
|
||||
+ ", crcPerBlock=" + crcPerBlock + ", md5=" + md5, e);
|
||||
}
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public String toString() {
|
||||
return getAlgorithmName() + ":" + md5;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,298 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.net.*;
|
||||
import java.io.*;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
/** Names a file or directory in a {@link FileSystem}.
|
||||
* Path strings use slash as the directory separator. A path string is
|
||||
* absolute if it begins with a slash.
|
||||
*/
|
||||
public class Path implements Comparable {
|
||||
|
||||
/** The directory separator, a slash. */
|
||||
public static final String SEPARATOR = "/";
|
||||
public static final char SEPARATOR_CHAR = '/';
|
||||
|
||||
public static final String CUR_DIR = ".";
|
||||
|
||||
static final boolean WINDOWS
|
||||
= System.getProperty("os.name").startsWith("Windows");
|
||||
|
||||
private URI uri; // a hierarchical uri
|
||||
|
||||
/** Resolve a child path against a parent path. */
|
||||
public Path(String parent, String child) {
|
||||
this(new Path(parent), new Path(child));
|
||||
}
|
||||
|
||||
/** Resolve a child path against a parent path. */
|
||||
public Path(Path parent, String child) {
|
||||
this(parent, new Path(child));
|
||||
}
|
||||
|
||||
/** Resolve a child path against a parent path. */
|
||||
public Path(String parent, Path child) {
|
||||
this(new Path(parent), child);
|
||||
}
|
||||
|
||||
/** Resolve a child path against a parent path. */
|
||||
public Path(Path parent, Path child) {
|
||||
// Add a slash to parent's path so resolution is compatible with URI's
|
||||
URI parentUri = parent.uri;
|
||||
String parentPath = parentUri.getPath();
|
||||
if (!(parentPath.equals("/") || parentPath.equals("")))
|
||||
try {
|
||||
parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(),
|
||||
parentUri.getPath()+"/", null, null);
|
||||
} catch (URISyntaxException e) {
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
URI resolved = parentUri.resolve(child.uri);
|
||||
initialize(resolved.getScheme(), resolved.getAuthority(),
|
||||
normalizePath(resolved.getPath()));
|
||||
}
|
||||
|
||||
private void checkPathArg( String path ) {
|
||||
// disallow construction of a Path from an empty string
|
||||
if ( path == null ) {
|
||||
throw new IllegalArgumentException(
|
||||
"Can not create a Path from a null string");
|
||||
}
|
||||
if( path.length() == 0 ) {
|
||||
throw new IllegalArgumentException(
|
||||
"Can not create a Path from an empty string");
|
||||
}
|
||||
}
|
||||
|
||||
/** Construct a path from a String. Path strings are URIs, but with
|
||||
* unescaped elements and some additional normalization. */
|
||||
public Path(String pathString) {
|
||||
checkPathArg( pathString );
|
||||
|
||||
// We can't use 'new URI(String)' directly, since it assumes things are
|
||||
// escaped, which we don't require of Paths.
|
||||
|
||||
// add a slash in front of paths with Windows drive letters
|
||||
if (hasWindowsDrive(pathString, false))
|
||||
pathString = "/"+pathString;
|
||||
|
||||
// parse uri components
|
||||
String scheme = null;
|
||||
String authority = null;
|
||||
|
||||
int start = 0;
|
||||
|
||||
// parse uri scheme, if any
|
||||
int colon = pathString.indexOf(':');
|
||||
int slash = pathString.indexOf('/');
|
||||
if ((colon != -1) &&
|
||||
((slash == -1) || (colon < slash))) { // has a scheme
|
||||
scheme = pathString.substring(0, colon);
|
||||
start = colon+1;
|
||||
}
|
||||
|
||||
// parse uri authority, if any
|
||||
if (pathString.startsWith("//", start) &&
|
||||
(pathString.length()-start > 2)) { // has authority
|
||||
int nextSlash = pathString.indexOf('/', start+2);
|
||||
int authEnd = nextSlash > 0 ? nextSlash : pathString.length();
|
||||
authority = pathString.substring(start+2, authEnd);
|
||||
start = authEnd;
|
||||
}
|
||||
|
||||
// uri path is the rest of the string -- query & fragment not supported
|
||||
String path = pathString.substring(start, pathString.length());
|
||||
|
||||
initialize(scheme, authority, path);
|
||||
}
|
||||
|
||||
/** Construct a Path from components. */
|
||||
public Path(String scheme, String authority, String path) {
|
||||
checkPathArg( path );
|
||||
initialize(scheme, authority, path);
|
||||
}
|
||||
|
||||
private void initialize(String scheme, String authority, String path) {
|
||||
try {
|
||||
this.uri = new URI(scheme, authority, normalizePath(path), null, null)
|
||||
.normalize();
|
||||
} catch (URISyntaxException e) {
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private String normalizePath(String path) {
|
||||
// remove double slashes & backslashes
|
||||
path = path.replace("//", "/");
|
||||
path = path.replace("\\", "/");
|
||||
|
||||
// trim trailing slash from non-root path (ignoring windows drive)
|
||||
int minLength = hasWindowsDrive(path, true) ? 4 : 1;
|
||||
if (path.length() > minLength && path.endsWith("/")) {
|
||||
path = path.substring(0, path.length()-1);
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
private boolean hasWindowsDrive(String path, boolean slashed) {
|
||||
if (!WINDOWS) return false;
|
||||
int start = slashed ? 1 : 0;
|
||||
return
|
||||
path.length() >= start+2 &&
|
||||
(slashed ? path.charAt(0) == '/' : true) &&
|
||||
path.charAt(start+1) == ':' &&
|
||||
((path.charAt(start) >= 'A' && path.charAt(start) <= 'Z') ||
|
||||
(path.charAt(start) >= 'a' && path.charAt(start) <= 'z'));
|
||||
}
|
||||
|
||||
|
||||
/** Convert this to a URI. */
|
||||
public URI toUri() { return uri; }
|
||||
|
||||
/** Return the FileSystem that owns this Path. */
|
||||
public FileSystem getFileSystem(Configuration conf) throws IOException {
|
||||
return FileSystem.get(this.toUri(), conf);
|
||||
}
|
||||
|
||||
/** True if the directory of this path is absolute. */
|
||||
public boolean isAbsolute() {
|
||||
int start = hasWindowsDrive(uri.getPath(), true) ? 3 : 0;
|
||||
return uri.getPath().startsWith(SEPARATOR, start);
|
||||
}
|
||||
|
||||
/** Returns the final component of this path.*/
|
||||
public String getName() {
|
||||
String path = uri.getPath();
|
||||
int slash = path.lastIndexOf(SEPARATOR);
|
||||
return path.substring(slash+1);
|
||||
}
|
||||
|
||||
/** Returns the parent of a path or null if at root. */
|
||||
public Path getParent() {
|
||||
String path = uri.getPath();
|
||||
int lastSlash = path.lastIndexOf('/');
|
||||
int start = hasWindowsDrive(path, true) ? 3 : 0;
|
||||
if ((path.length() == start) || // empty path
|
||||
(lastSlash == start && path.length() == start+1)) { // at root
|
||||
return null;
|
||||
}
|
||||
String parent;
|
||||
if (lastSlash==-1) {
|
||||
parent = CUR_DIR;
|
||||
} else {
|
||||
int end = hasWindowsDrive(path, true) ? 3 : 0;
|
||||
parent = path.substring(0, lastSlash==end?end+1:lastSlash);
|
||||
}
|
||||
return new Path(uri.getScheme(), uri.getAuthority(), parent);
|
||||
}
|
||||
|
||||
/** Adds a suffix to the final name in the path.*/
|
||||
public Path suffix(String suffix) {
|
||||
return new Path(getParent(), getName()+suffix);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
// we can't use uri.toString(), which escapes everything, because we want
|
||||
// illegal characters unescaped in the string, for glob processing, etc.
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
if (uri.getScheme() != null) {
|
||||
buffer.append(uri.getScheme());
|
||||
buffer.append(":");
|
||||
}
|
||||
if (uri.getAuthority() != null) {
|
||||
buffer.append("//");
|
||||
buffer.append(uri.getAuthority());
|
||||
}
|
||||
if (uri.getPath() != null) {
|
||||
String path = uri.getPath();
|
||||
if (path.indexOf('/')==0 &&
|
||||
hasWindowsDrive(path, true) && // has windows drive
|
||||
uri.getScheme() == null && // but no scheme
|
||||
uri.getAuthority() == null) // or authority
|
||||
path = path.substring(1); // remove slash before drive
|
||||
buffer.append(path);
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof Path)) {
|
||||
return false;
|
||||
}
|
||||
Path that = (Path)o;
|
||||
return this.uri.equals(that.uri);
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return uri.hashCode();
|
||||
}
|
||||
|
||||
public int compareTo(Object o) {
|
||||
Path that = (Path)o;
|
||||
return this.uri.compareTo(that.uri);
|
||||
}
|
||||
|
||||
/** Return the number of elements in this path. */
|
||||
public int depth() {
|
||||
String path = uri.getPath();
|
||||
int depth = 0;
|
||||
int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0;
|
||||
while (slash != -1) {
|
||||
depth++;
|
||||
slash = path.indexOf(SEPARATOR, slash+1);
|
||||
}
|
||||
return depth;
|
||||
}
|
||||
|
||||
/** Returns a qualified path object. */
|
||||
public Path makeQualified(FileSystem fs) {
|
||||
Path path = this;
|
||||
if (!isAbsolute()) {
|
||||
path = new Path(fs.getWorkingDirectory(), this);
|
||||
}
|
||||
|
||||
URI pathUri = path.toUri();
|
||||
URI fsUri = fs.getUri();
|
||||
|
||||
String scheme = pathUri.getScheme();
|
||||
String authority = pathUri.getAuthority();
|
||||
|
||||
if (scheme != null &&
|
||||
(authority != null || fsUri.getAuthority() == null))
|
||||
return path;
|
||||
|
||||
if (scheme == null) {
|
||||
scheme = fsUri.getScheme();
|
||||
}
|
||||
|
||||
if (authority == null) {
|
||||
authority = fsUri.getAuthority();
|
||||
if (authority == null) {
|
||||
authority = "";
|
||||
}
|
||||
}
|
||||
|
||||
return new Path(scheme+":"+"//"+authority + pathUri.getPath());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
public interface PathFilter {
|
||||
/**
|
||||
* Tests whether or not the specified abstract pathname should be
|
||||
* included in a pathname list.
|
||||
*
|
||||
* @param path The abstract pathname to be tested
|
||||
* @return <code>true</code> if and only if <code>pathname</code>
|
||||
* should be included
|
||||
*/
|
||||
boolean accept(Path path);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.*;
|
||||
import org.apache.hadoop.fs.*;
|
||||
|
||||
/** Stream that permits positional reading. */
|
||||
public interface PositionedReadable {
|
||||
/**
|
||||
* Read upto the specified number of bytes, from a given
|
||||
* position within a file, and return the number of bytes read. This does not
|
||||
* change the current offset of a file, and is thread-safe.
|
||||
*/
|
||||
public int read(long position, byte[] buffer, int offset, int length)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
* Read the specified number of bytes, from a given
|
||||
* position within a file. This does not
|
||||
* change the current offset of a file, and is thread-safe.
|
||||
*/
|
||||
public void readFully(long position, byte[] buffer, int offset, int length)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
* Read number of bytes equalt to the length of the buffer, from a given
|
||||
* position within a file. This does not
|
||||
* change the current offset of a file, and is thread-safe.
|
||||
*/
|
||||
public void readFully(long position, byte[] buffer) throws IOException;
|
||||
}
|
|
@ -0,0 +1,496 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.DataOutput;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.net.URI;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
import org.apache.hadoop.util.Shell;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
|
||||
/****************************************************************
|
||||
* Implement the FileSystem API for the raw local filesystem.
|
||||
*
|
||||
*****************************************************************/
|
||||
public class RawLocalFileSystem extends FileSystem {
|
||||
static final URI NAME = URI.create("file:///");
|
||||
private Path workingDir;
|
||||
|
||||
public RawLocalFileSystem() {
|
||||
workingDir = new Path(System.getProperty("user.dir")).makeQualified(this);
|
||||
}
|
||||
|
||||
/** Convert a path to a File. */
|
||||
public File pathToFile(Path path) {
|
||||
checkPath(path);
|
||||
if (!path.isAbsolute()) {
|
||||
path = new Path(getWorkingDirectory(), path);
|
||||
}
|
||||
return new File(path.toUri().getPath());
|
||||
}
|
||||
|
||||
public URI getUri() { return NAME; }
|
||||
|
||||
public void initialize(URI uri, Configuration conf) throws IOException {
|
||||
super.initialize(uri, conf);
|
||||
setConf(conf);
|
||||
}
|
||||
|
||||
class TrackingFileInputStream extends FileInputStream {
|
||||
public TrackingFileInputStream(File f) throws IOException {
|
||||
super(f);
|
||||
}
|
||||
|
||||
public int read() throws IOException {
|
||||
int result = super.read();
|
||||
if (result != -1) {
|
||||
statistics.incrementBytesRead(1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public int read(byte[] data) throws IOException {
|
||||
int result = super.read(data);
|
||||
if (result != -1) {
|
||||
statistics.incrementBytesRead(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public int read(byte[] data, int offset, int length) throws IOException {
|
||||
int result = super.read(data, offset, length);
|
||||
if (result != -1) {
|
||||
statistics.incrementBytesRead(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/*******************************************************
|
||||
* For open()'s FSInputStream
|
||||
*******************************************************/
|
||||
class LocalFSFileInputStream extends FSInputStream {
|
||||
FileInputStream fis;
|
||||
private long position;
|
||||
|
||||
public LocalFSFileInputStream(Path f) throws IOException {
|
||||
this.fis = new TrackingFileInputStream(pathToFile(f));
|
||||
}
|
||||
|
||||
public void seek(long pos) throws IOException {
|
||||
fis.getChannel().position(pos);
|
||||
this.position = pos;
|
||||
}
|
||||
|
||||
public long getPos() throws IOException {
|
||||
return this.position;
|
||||
}
|
||||
|
||||
public boolean seekToNewSource(long targetPos) throws IOException {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Just forward to the fis
|
||||
*/
|
||||
public int available() throws IOException { return fis.available(); }
|
||||
public void close() throws IOException { fis.close(); }
|
||||
public boolean markSupport() { return false; }
|
||||
|
||||
public int read() throws IOException {
|
||||
try {
|
||||
int value = fis.read();
|
||||
if (value >= 0) {
|
||||
this.position++;
|
||||
}
|
||||
return value;
|
||||
} catch (IOException e) { // unexpected exception
|
||||
throw new FSError(e); // assume native fs error
|
||||
}
|
||||
}
|
||||
|
||||
public int read(byte[] b, int off, int len) throws IOException {
|
||||
try {
|
||||
int value = fis.read(b, off, len);
|
||||
if (value > 0) {
|
||||
this.position += value;
|
||||
}
|
||||
return value;
|
||||
} catch (IOException e) { // unexpected exception
|
||||
throw new FSError(e); // assume native fs error
|
||||
}
|
||||
}
|
||||
|
||||
public int read(long position, byte[] b, int off, int len)
|
||||
throws IOException {
|
||||
ByteBuffer bb = ByteBuffer.wrap(b, off, len);
|
||||
try {
|
||||
return fis.getChannel().read(bb, position);
|
||||
} catch (IOException e) {
|
||||
throw new FSError(e);
|
||||
}
|
||||
}
|
||||
|
||||
public long skip(long n) throws IOException {
|
||||
long value = fis.skip(n);
|
||||
if (value > 0) {
|
||||
this.position += value;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
|
||||
if (!exists(f)) {
|
||||
throw new FileNotFoundException(f.toString());
|
||||
}
|
||||
return new FSDataInputStream(new BufferedFSInputStream(
|
||||
new LocalFSFileInputStream(f), bufferSize));
|
||||
}
|
||||
|
||||
/*********************************************************
|
||||
* For create()'s FSOutputStream.
|
||||
*********************************************************/
|
||||
class LocalFSFileOutputStream extends OutputStream implements Syncable {
|
||||
FileOutputStream fos;
|
||||
|
||||
private LocalFSFileOutputStream(Path f, boolean append) throws IOException {
|
||||
this.fos = new FileOutputStream(pathToFile(f), append);
|
||||
}
|
||||
|
||||
/*
|
||||
* Just forward to the fos
|
||||
*/
|
||||
public void close() throws IOException { fos.close(); }
|
||||
public void flush() throws IOException { fos.flush(); }
|
||||
public void write(byte[] b, int off, int len) throws IOException {
|
||||
try {
|
||||
fos.write(b, off, len);
|
||||
} catch (IOException e) { // unexpected exception
|
||||
throw new FSError(e); // assume native fs error
|
||||
}
|
||||
}
|
||||
|
||||
public void write(int b) throws IOException {
|
||||
try {
|
||||
fos.write(b);
|
||||
} catch (IOException e) { // unexpected exception
|
||||
throw new FSError(e); // assume native fs error
|
||||
}
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void sync() throws IOException {
|
||||
fos.getFD().sync();
|
||||
}
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public FSDataOutputStream append(Path f, int bufferSize,
|
||||
Progressable progress) throws IOException {
|
||||
if (!exists(f)) {
|
||||
throw new FileNotFoundException("File " + f + " not found.");
|
||||
}
|
||||
if (getFileStatus(f).isDir()) {
|
||||
throw new IOException("Cannot append to a diretory (=" + f + " ).");
|
||||
}
|
||||
return new FSDataOutputStream(new BufferedOutputStream(
|
||||
new LocalFSFileOutputStream(f, true), bufferSize), statistics);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize,
|
||||
short replication, long blockSize, Progressable progress)
|
||||
throws IOException {
|
||||
if (exists(f) && !overwrite) {
|
||||
throw new IOException("File already exists:"+f);
|
||||
}
|
||||
Path parent = f.getParent();
|
||||
if (parent != null && !mkdirs(parent)) {
|
||||
throw new IOException("Mkdirs failed to create " + parent.toString());
|
||||
}
|
||||
return new FSDataOutputStream(new BufferedOutputStream(
|
||||
new LocalFSFileOutputStream(f, false), bufferSize), statistics);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public FSDataOutputStream create(Path f, FsPermission permission,
|
||||
boolean overwrite, int bufferSize, short replication, long blockSize,
|
||||
Progressable progress) throws IOException {
|
||||
FSDataOutputStream out = create(f,
|
||||
overwrite, bufferSize, replication, blockSize, progress);
|
||||
setPermission(f, permission);
|
||||
return out;
|
||||
}
|
||||
|
||||
public boolean rename(Path src, Path dst) throws IOException {
|
||||
if (pathToFile(src).renameTo(pathToFile(dst))) {
|
||||
return true;
|
||||
}
|
||||
return FileUtil.copy(this, src, this, dst, true, getConf());
|
||||
}
|
||||
|
||||
public boolean delete(Path p, boolean recursive) throws IOException {
|
||||
File f = pathToFile(p);
|
||||
if (f.isFile()) {
|
||||
return f.delete();
|
||||
} else if ((!recursive) && f.isDirectory() &&
|
||||
(f.listFiles().length != 0)) {
|
||||
throw new IOException("Directory " + f.toString() + " is not empty");
|
||||
}
|
||||
return FileUtil.fullyDelete(f);
|
||||
}
|
||||
|
||||
public FileStatus[] listStatus(Path f) throws IOException {
|
||||
File localf = pathToFile(f);
|
||||
FileStatus[] results;
|
||||
|
||||
if (!localf.exists()) {
|
||||
return null;
|
||||
}
|
||||
if (localf.isFile()) {
|
||||
return new FileStatus[] {
|
||||
new RawLocalFileStatus(localf, getDefaultBlockSize(), this) };
|
||||
}
|
||||
|
||||
String[] names = localf.list();
|
||||
if (names == null) {
|
||||
return null;
|
||||
}
|
||||
results = new FileStatus[names.length];
|
||||
for (int i = 0; i < names.length; i++) {
|
||||
results[i] = getFileStatus(new Path(f, names[i]));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the specified directory hierarchy. Does not
|
||||
* treat existence as an error.
|
||||
*/
|
||||
public boolean mkdirs(Path f) throws IOException {
|
||||
Path parent = f.getParent();
|
||||
File p2f = pathToFile(f);
|
||||
return (parent == null || mkdirs(parent)) &&
|
||||
(p2f.mkdir() || p2f.isDirectory());
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public boolean mkdirs(Path f, FsPermission permission) throws IOException {
|
||||
boolean b = mkdirs(f);
|
||||
setPermission(f, permission);
|
||||
return b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getHomeDirectory() {
|
||||
return new Path(System.getProperty("user.home")).makeQualified(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the working directory to the given directory.
|
||||
*/
|
||||
@Override
|
||||
public void setWorkingDirectory(Path newDir) {
|
||||
workingDir = newDir;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getWorkingDirectory() {
|
||||
return workingDir;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public FsStatus getStatus(Path p) throws IOException {
|
||||
File partition = pathToFile(p == null ? new Path("/") : p);
|
||||
//File provides getUsableSpace() and getFreeSpace()
|
||||
//File provides no API to obtain used space, assume used = total - free
|
||||
return new FsStatus(partition.getTotalSpace(),
|
||||
partition.getTotalSpace() - partition.getFreeSpace(),
|
||||
partition.getFreeSpace());
|
||||
}
|
||||
|
||||
// In the case of the local filesystem, we can just rename the file.
|
||||
public void moveFromLocalFile(Path src, Path dst) throws IOException {
|
||||
rename(src, dst);
|
||||
}
|
||||
|
||||
// We can write output directly to the final location
|
||||
public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
|
||||
throws IOException {
|
||||
return fsOutputFile;
|
||||
}
|
||||
|
||||
// It's in the right place - nothing to do.
|
||||
public void completeLocalOutput(Path fsWorkingFile, Path tmpLocalFile)
|
||||
throws IOException {
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "LocalFS";
|
||||
}
|
||||
|
||||
public FileStatus getFileStatus(Path f) throws IOException {
|
||||
File path = pathToFile(f);
|
||||
if (path.exists()) {
|
||||
return new RawLocalFileStatus(pathToFile(f), getDefaultBlockSize(), this);
|
||||
} else {
|
||||
throw new FileNotFoundException( "File " + f + " does not exist.");
|
||||
}
|
||||
}
|
||||
|
||||
static class RawLocalFileStatus extends FileStatus {
|
||||
/* We can add extra fields here. It breaks at least CopyFiles.FilePair().
|
||||
* We recognize if the information is already loaded by check if
|
||||
* onwer.equals("").
|
||||
*/
|
||||
private boolean isPermissionLoaded() {
|
||||
return !super.getOwner().equals("");
|
||||
}
|
||||
|
||||
RawLocalFileStatus(File f, long defaultBlockSize, FileSystem fs) {
|
||||
super(f.length(), f.isDirectory(), 1, defaultBlockSize,
|
||||
f.lastModified(), new Path(f.getPath()).makeQualified(fs));
|
||||
}
|
||||
|
||||
@Override
|
||||
public FsPermission getPermission() {
|
||||
if (!isPermissionLoaded()) {
|
||||
loadPermissionInfo();
|
||||
}
|
||||
return super.getPermission();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getOwner() {
|
||||
if (!isPermissionLoaded()) {
|
||||
loadPermissionInfo();
|
||||
}
|
||||
return super.getOwner();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGroup() {
|
||||
if (!isPermissionLoaded()) {
|
||||
loadPermissionInfo();
|
||||
}
|
||||
return super.getGroup();
|
||||
}
|
||||
|
||||
/// loads permissions, owner, and group from `ls -ld`
|
||||
private void loadPermissionInfo() {
|
||||
IOException e = null;
|
||||
try {
|
||||
StringTokenizer t = new StringTokenizer(
|
||||
execCommand(new File(getPath().toUri()),
|
||||
Shell.getGET_PERMISSION_COMMAND()));
|
||||
//expected format
|
||||
//-rw------- 1 username groupname ...
|
||||
String permission = t.nextToken();
|
||||
if (permission.length() > 10) { //files with ACLs might have a '+'
|
||||
permission = permission.substring(0, 10);
|
||||
}
|
||||
setPermission(FsPermission.valueOf(permission));
|
||||
t.nextToken();
|
||||
setOwner(t.nextToken());
|
||||
setGroup(t.nextToken());
|
||||
} catch (Shell.ExitCodeException ioe) {
|
||||
if (ioe.getExitCode() != 1) {
|
||||
e = ioe;
|
||||
} else {
|
||||
setPermission(null);
|
||||
setOwner(null);
|
||||
setGroup(null);
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
e = ioe;
|
||||
} finally {
|
||||
if (e != null) {
|
||||
throw new RuntimeException("Error while running command to get " +
|
||||
"file permissions : " +
|
||||
StringUtils.stringifyException(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
if (!isPermissionLoaded()) {
|
||||
loadPermissionInfo();
|
||||
}
|
||||
super.write(out);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Use the command chown to set owner.
|
||||
*/
|
||||
@Override
|
||||
public void setOwner(Path p, String username, String groupname
|
||||
) throws IOException {
|
||||
if (username == null && groupname == null) {
|
||||
throw new IOException("username == null && groupname == null");
|
||||
}
|
||||
|
||||
if (username == null) {
|
||||
execCommand(pathToFile(p), Shell.SET_GROUP_COMMAND, groupname);
|
||||
} else {
|
||||
//OWNER[:[GROUP]]
|
||||
String s = username + (groupname == null? "": ":" + groupname);
|
||||
execCommand(pathToFile(p), Shell.SET_OWNER_COMMAND, s);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Use the command chmod to set permission.
|
||||
*/
|
||||
@Override
|
||||
public void setPermission(Path p, FsPermission permission
|
||||
) throws IOException {
|
||||
execCommand(pathToFile(p), Shell.SET_PERMISSION_COMMAND,
|
||||
String.format("%05o", permission.toShort()));
|
||||
}
|
||||
|
||||
private static String execCommand(File f, String... cmd) throws IOException {
|
||||
String[] args = new String[cmd.length + 1];
|
||||
System.arraycopy(cmd, 0, args, 0, cmd.length);
|
||||
args[cmd.length] = f.getCanonicalPath();
|
||||
String output = Shell.execCommand(args);
|
||||
return output;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/** Stream that permits seeking. */
|
||||
public interface Seekable {
|
||||
/**
|
||||
* Seek to the given offset from the start of the file.
|
||||
* The next read() will be from that location. Can't
|
||||
* seek past the end of the file.
|
||||
*/
|
||||
void seek(long pos) throws IOException;
|
||||
|
||||
/**
|
||||
* Return the current offset from the start of the file
|
||||
*/
|
||||
long getPos() throws IOException;
|
||||
|
||||
/**
|
||||
* Seeks a different copy of the data. Returns true if
|
||||
* found a new source, false otherwise.
|
||||
*/
|
||||
boolean seekToNewSource(long targetPos) throws IOException;
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** This interface declare the sync() operation. */
|
||||
public interface Syncable {
|
||||
/**
|
||||
* Synchronize all buffer with the underlying devices.
|
||||
* @throws IOException
|
||||
*/
|
||||
public void sync() throws IOException;
|
||||
}
|
|
@ -0,0 +1,291 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import java.text.*;
|
||||
import java.io.*;
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.commons.logging.*;
|
||||
|
||||
import org.apache.hadoop.conf.*;
|
||||
import org.apache.hadoop.fs.permission.*;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
|
||||
/** Provides a <i>trash</i> feature. Files are moved to a user's trash
|
||||
* directory, a subdirectory of their home directory named ".Trash". Files are
|
||||
* initially moved to a <i>current</i> sub-directory of the trash directory.
|
||||
* Within that sub-directory their original path is preserved. Periodically
|
||||
* one may checkpoint the current trash and remove older checkpoints. (This
|
||||
* design permits trash management without enumeration of the full trash
|
||||
* content, without date support in the filesystem, and without clock
|
||||
* synchronization.)
|
||||
*/
|
||||
public class Trash extends Configured {
|
||||
private static final Log LOG =
|
||||
LogFactory.getLog(Trash.class);
|
||||
|
||||
private static final Path CURRENT = new Path("Current");
|
||||
private static final Path TRASH = new Path(".Trash/");
|
||||
private static final Path HOMES = new Path("/user/");
|
||||
|
||||
private static final FsPermission PERMISSION =
|
||||
new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
|
||||
|
||||
private static final DateFormat CHECKPOINT = new SimpleDateFormat("yyMMddHHmm");
|
||||
private static final int MSECS_PER_MINUTE = 60*1000;
|
||||
|
||||
private final FileSystem fs;
|
||||
private final Path trash;
|
||||
private final Path current;
|
||||
private final long interval;
|
||||
|
||||
/** Construct a trash can accessor.
|
||||
* @param conf a Configuration
|
||||
*/
|
||||
public Trash(Configuration conf) throws IOException {
|
||||
this(FileSystem.get(conf), conf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a trash can accessor for the FileSystem provided.
|
||||
*/
|
||||
public Trash(FileSystem fs, Configuration conf) throws IOException {
|
||||
super(conf);
|
||||
this.fs = fs;
|
||||
this.trash = new Path(fs.getHomeDirectory(), TRASH);
|
||||
this.current = new Path(trash, CURRENT);
|
||||
this.interval = conf.getLong("fs.trash.interval", 60) * MSECS_PER_MINUTE;
|
||||
}
|
||||
|
||||
private Trash(Path home, Configuration conf) throws IOException {
|
||||
super(conf);
|
||||
this.fs = home.getFileSystem(conf);
|
||||
this.trash = new Path(home, TRASH);
|
||||
this.current = new Path(trash, CURRENT);
|
||||
this.interval = conf.getLong("fs.trash.interval", 60) * MSECS_PER_MINUTE;
|
||||
}
|
||||
|
||||
private Path makeTrashRelativePath(Path basePath, Path rmFilePath) {
|
||||
return new Path(basePath + rmFilePath.toUri().getPath());
|
||||
}
|
||||
|
||||
/** Move a file or directory to the current trash directory.
|
||||
* @return false if the item is already in the trash or trash is disabled
|
||||
*/
|
||||
public boolean moveToTrash(Path path) throws IOException {
|
||||
if (interval == 0)
|
||||
return false;
|
||||
|
||||
if (!path.isAbsolute()) // make path absolute
|
||||
path = new Path(fs.getWorkingDirectory(), path);
|
||||
|
||||
if (!fs.exists(path)) // check that path exists
|
||||
throw new FileNotFoundException(path.toString());
|
||||
|
||||
String qpath = path.makeQualified(fs).toString();
|
||||
|
||||
if (qpath.startsWith(trash.toString())) {
|
||||
return false; // already in trash
|
||||
}
|
||||
|
||||
if (trash.getParent().toString().startsWith(qpath)) {
|
||||
throw new IOException("Cannot move \"" + path +
|
||||
"\" to the trash, as it contains the trash");
|
||||
}
|
||||
|
||||
Path trashPath = makeTrashRelativePath(current, path);
|
||||
Path baseTrashPath = makeTrashRelativePath(current, path.getParent());
|
||||
|
||||
IOException cause = null;
|
||||
|
||||
// try twice, in case checkpoint between the mkdirs() & rename()
|
||||
for (int i = 0; i < 2; i++) {
|
||||
try {
|
||||
if (!fs.mkdirs(baseTrashPath, PERMISSION)) { // create current
|
||||
LOG.warn("Can't create trash directory: "+baseTrashPath);
|
||||
return false;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Can't create trash directory: "+baseTrashPath);
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
//
|
||||
// if the target path in Trash already exists, then append with
|
||||
// a number. Start from 1.
|
||||
//
|
||||
String orig = trashPath.toString();
|
||||
for (int j = 1; fs.exists(trashPath); j++) {
|
||||
trashPath = new Path(orig + "." + j);
|
||||
}
|
||||
if (fs.rename(path, trashPath)) // move to current trash
|
||||
return true;
|
||||
} catch (IOException e) {
|
||||
cause = e;
|
||||
}
|
||||
}
|
||||
throw (IOException)
|
||||
new IOException("Failed to move to trash: "+path).initCause(cause);
|
||||
}
|
||||
|
||||
/** Create a trash checkpoint. */
|
||||
public void checkpoint() throws IOException {
|
||||
if (!fs.exists(current)) // no trash, no checkpoint
|
||||
return;
|
||||
|
||||
Path checkpoint;
|
||||
synchronized (CHECKPOINT) {
|
||||
checkpoint = new Path(trash, CHECKPOINT.format(new Date()));
|
||||
}
|
||||
|
||||
if (fs.rename(current, checkpoint)) {
|
||||
LOG.info("Created trash checkpoint: "+checkpoint.toUri().getPath());
|
||||
} else {
|
||||
throw new IOException("Failed to checkpoint trash: "+checkpoint);
|
||||
}
|
||||
}
|
||||
|
||||
/** Delete old checkpoints. */
|
||||
public void expunge() throws IOException {
|
||||
FileStatus[] dirs = fs.listStatus(trash); // scan trash sub-directories
|
||||
if( dirs == null){
|
||||
return;
|
||||
}
|
||||
long now = System.currentTimeMillis();
|
||||
for (int i = 0; i < dirs.length; i++) {
|
||||
Path path = dirs[i].getPath();
|
||||
String dir = path.toUri().getPath();
|
||||
String name = path.getName();
|
||||
if (name.equals(CURRENT.getName())) // skip current
|
||||
continue;
|
||||
|
||||
long time;
|
||||
try {
|
||||
synchronized (CHECKPOINT) {
|
||||
time = CHECKPOINT.parse(name).getTime();
|
||||
}
|
||||
} catch (ParseException e) {
|
||||
LOG.warn("Unexpected item in trash: "+dir+". Ignoring.");
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((now - interval) > time) {
|
||||
if (fs.delete(path, true)) {
|
||||
LOG.info("Deleted trash checkpoint: "+dir);
|
||||
} else {
|
||||
LOG.warn("Couldn't delete checkpoint: "+dir+" Ignoring.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// get the current working directory
|
||||
//
|
||||
Path getCurrentTrashDir() {
|
||||
return current;
|
||||
}
|
||||
|
||||
/** Return a {@link Runnable} that periodically empties the trash of all
|
||||
* users, intended to be run by the superuser. Only one checkpoint is kept
|
||||
* at a time.
|
||||
*/
|
||||
public Runnable getEmptier() throws IOException {
|
||||
return new Emptier(getConf());
|
||||
}
|
||||
|
||||
private class Emptier implements Runnable {
|
||||
|
||||
private Configuration conf;
|
||||
private long interval;
|
||||
|
||||
Emptier(Configuration conf) throws IOException {
|
||||
this.conf = conf;
|
||||
this.interval = conf.getLong("fs.trash.interval", 0) * MSECS_PER_MINUTE;
|
||||
}
|
||||
|
||||
public void run() {
|
||||
if (interval == 0)
|
||||
return; // trash disabled
|
||||
|
||||
long now = System.currentTimeMillis();
|
||||
long end;
|
||||
while (true) {
|
||||
end = ceiling(now, interval);
|
||||
try { // sleep for interval
|
||||
Thread.sleep(end - now);
|
||||
} catch (InterruptedException e) {
|
||||
break; // exit on interrupt
|
||||
}
|
||||
|
||||
try {
|
||||
now = System.currentTimeMillis();
|
||||
if (now >= end) {
|
||||
|
||||
FileStatus[] homes = null;
|
||||
try {
|
||||
homes = fs.listStatus(HOMES); // list all home dirs
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Trash can't list homes: "+e+" Sleeping.");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (homes == null)
|
||||
continue;
|
||||
|
||||
for (FileStatus home : homes) { // dump each trash
|
||||
if (!home.isDir())
|
||||
continue;
|
||||
try {
|
||||
Trash trash = new Trash(home.getPath(), conf);
|
||||
trash.expunge();
|
||||
trash.checkpoint();
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Trash caught: "+e+". Skipping "+home.getPath()+".");
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.warn("RuntimeException during Trash.Emptier.run() " +
|
||||
StringUtils.stringifyException(e));
|
||||
}
|
||||
}
|
||||
try {
|
||||
fs.close();
|
||||
} catch(IOException e) {
|
||||
LOG.warn("Trash cannot close FileSystem. " +
|
||||
StringUtils.stringifyException(e));
|
||||
}
|
||||
}
|
||||
|
||||
private long ceiling(long time, long interval) {
|
||||
return floor(time, interval) + interval;
|
||||
}
|
||||
private long floor(long time, long interval) {
|
||||
return (time / interval) * interval;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Run an emptier.*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
new Trash(new Configuration()).getEmptier().run();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.ftp;
|
||||
|
||||
/**
|
||||
* A class to wrap a {@link Throwable} into a Runtime Exception.
|
||||
*/
|
||||
public class FTPException extends RuntimeException {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
public FTPException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public FTPException(Throwable t) {
|
||||
super(t);
|
||||
}
|
||||
|
||||
public FTPException(String message, Throwable t) {
|
||||
super(message, t);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,576 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.ftp;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URI;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.commons.net.ftp.FTP;
|
||||
import org.apache.commons.net.ftp.FTPClient;
|
||||
import org.apache.commons.net.ftp.FTPFile;
|
||||
import org.apache.commons.net.ftp.FTPReply;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.permission.FsAction;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A {@link FileSystem} backed by an FTP client provided by <a
|
||||
* href="http://commons.apache.org/net/">Apache Commons Net</a>.
|
||||
* </p>
|
||||
*/
|
||||
public class FTPFileSystem extends FileSystem {
|
||||
|
||||
public static final Log LOG = LogFactory
|
||||
.getLog(FTPFileSystem.class);
|
||||
|
||||
public static final int DEFAULT_BUFFER_SIZE = 1024 * 1024;
|
||||
|
||||
public static final int DEFAULT_BLOCK_SIZE = 4 * 1024;
|
||||
|
||||
private URI uri;
|
||||
|
||||
@Override
|
||||
public void initialize(URI uri, Configuration conf) throws IOException { // get
|
||||
super.initialize(uri, conf);
|
||||
// get host information from uri (overrides info in conf)
|
||||
String host = uri.getHost();
|
||||
host = (host == null) ? conf.get("fs.ftp.host", null) : host;
|
||||
if (host == null) {
|
||||
throw new IOException("Invalid host specified");
|
||||
}
|
||||
conf.set("fs.ftp.host", host);
|
||||
|
||||
// get port information from uri, (overrides info in conf)
|
||||
int port = uri.getPort();
|
||||
port = (port == -1) ? FTP.DEFAULT_PORT : port;
|
||||
conf.setInt("fs.ftp.host.port", port);
|
||||
|
||||
// get user/password information from URI (overrides info in conf)
|
||||
String userAndPassword = uri.getUserInfo();
|
||||
if (userAndPassword == null) {
|
||||
userAndPassword = (conf.get("fs.ftp.user." + host, null) + ":" + conf
|
||||
.get("fs.ftp.password." + host, null));
|
||||
if (userAndPassword == null) {
|
||||
throw new IOException("Invalid user/passsword specified");
|
||||
}
|
||||
}
|
||||
String[] userPasswdInfo = userAndPassword.split(":");
|
||||
conf.set("fs.ftp.user." + host, userPasswdInfo[0]);
|
||||
if (userPasswdInfo.length > 1) {
|
||||
conf.set("fs.ftp.password." + host, userPasswdInfo[1]);
|
||||
} else {
|
||||
conf.set("fs.ftp.password." + host, null);
|
||||
}
|
||||
setConf(conf);
|
||||
this.uri = uri;
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to the FTP server using configuration parameters *
|
||||
*
|
||||
* @return An FTPClient instance
|
||||
* @throws IOException
|
||||
*/
|
||||
private FTPClient connect() throws IOException {
|
||||
FTPClient client = null;
|
||||
Configuration conf = getConf();
|
||||
String host = conf.get("fs.ftp.host");
|
||||
int port = conf.getInt("fs.ftp.host.port", FTP.DEFAULT_PORT);
|
||||
String user = conf.get("fs.ftp.user." + host);
|
||||
String password = conf.get("fs.ftp.password." + host);
|
||||
client = new FTPClient();
|
||||
client.connect(host, port);
|
||||
int reply = client.getReplyCode();
|
||||
if (!FTPReply.isPositiveCompletion(reply)) {
|
||||
throw new IOException("Server - " + host
|
||||
+ " refused connection on port - " + port);
|
||||
} else if (client.login(user, password)) {
|
||||
client.setFileTransferMode(FTP.BLOCK_TRANSFER_MODE);
|
||||
client.setFileType(FTP.BINARY_FILE_TYPE);
|
||||
client.setBufferSize(DEFAULT_BUFFER_SIZE);
|
||||
} else {
|
||||
throw new IOException("Login failed on server - " + host + ", port - "
|
||||
+ port);
|
||||
}
|
||||
|
||||
return client;
|
||||
}
|
||||
|
||||
/**
|
||||
* Logout and disconnect the given FTPClient. *
|
||||
*
|
||||
* @param client
|
||||
* @throws IOException
|
||||
*/
|
||||
private void disconnect(FTPClient client) throws IOException {
|
||||
if (client != null) {
|
||||
if (!client.isConnected()) {
|
||||
throw new FTPException("Client not connected");
|
||||
}
|
||||
boolean logoutSuccess = client.logout();
|
||||
client.disconnect();
|
||||
if (!logoutSuccess) {
|
||||
LOG.warn("Logout failed while disconnecting, error code - "
|
||||
+ client.getReplyCode());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve against given working directory. *
|
||||
*
|
||||
* @param workDir
|
||||
* @param path
|
||||
* @return
|
||||
*/
|
||||
private Path makeAbsolute(Path workDir, Path path) {
|
||||
if (path.isAbsolute()) {
|
||||
return path;
|
||||
}
|
||||
return new Path(workDir, path);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataInputStream open(Path file, int bufferSize) throws IOException {
|
||||
FTPClient client = connect();
|
||||
Path workDir = new Path(client.printWorkingDirectory());
|
||||
Path absolute = makeAbsolute(workDir, file);
|
||||
FileStatus fileStat = getFileStatus(client, absolute);
|
||||
if (fileStat.isDir()) {
|
||||
disconnect(client);
|
||||
throw new IOException("Path " + file + " is a directory.");
|
||||
}
|
||||
client.allocate(bufferSize);
|
||||
Path parent = absolute.getParent();
|
||||
// Change to parent directory on the
|
||||
// server. Only then can we read the
|
||||
// file
|
||||
// on the server by opening up an InputStream. As a side effect the working
|
||||
// directory on the server is changed to the parent directory of the file.
|
||||
// The FTP client connection is closed when close() is called on the
|
||||
// FSDataInputStream.
|
||||
client.changeWorkingDirectory(parent.toUri().getPath());
|
||||
InputStream is = client.retrieveFileStream(file.getName());
|
||||
FSDataInputStream fis = new FSDataInputStream(new FTPInputStream(is,
|
||||
client, statistics));
|
||||
if (!FTPReply.isPositivePreliminary(client.getReplyCode())) {
|
||||
// The ftpClient is an inconsistent state. Must close the stream
|
||||
// which in turn will logout and disconnect from FTP server
|
||||
fis.close();
|
||||
throw new IOException("Unable to open file: " + file + ", Aborting");
|
||||
}
|
||||
return fis;
|
||||
}
|
||||
|
||||
/**
|
||||
* A stream obtained via this call must be closed before using other APIs of
|
||||
* this class or else the invocation will block.
|
||||
*/
|
||||
@Override
|
||||
public FSDataOutputStream create(Path file, FsPermission permission,
|
||||
boolean overwrite, int bufferSize, short replication, long blockSize,
|
||||
Progressable progress) throws IOException {
|
||||
final FTPClient client = connect();
|
||||
Path workDir = new Path(client.printWorkingDirectory());
|
||||
Path absolute = makeAbsolute(workDir, file);
|
||||
if (exists(client, file)) {
|
||||
if (overwrite) {
|
||||
delete(client, file);
|
||||
} else {
|
||||
disconnect(client);
|
||||
throw new IOException("File already exists: " + file);
|
||||
}
|
||||
}
|
||||
Path parent = absolute.getParent();
|
||||
if (parent == null || !mkdirs(client, parent, FsPermission.getDefault())) {
|
||||
parent = (parent == null) ? new Path("/") : parent;
|
||||
disconnect(client);
|
||||
throw new IOException("create(): Mkdirs failed to create: " + parent);
|
||||
}
|
||||
client.allocate(bufferSize);
|
||||
// Change to parent directory on the server. Only then can we write to the
|
||||
// file on the server by opening up an OutputStream. As a side effect the
|
||||
// working directory on the server is changed to the parent directory of the
|
||||
// file. The FTP client connection is closed when close() is called on the
|
||||
// FSDataOutputStream.
|
||||
client.changeWorkingDirectory(parent.toUri().getPath());
|
||||
FSDataOutputStream fos = new FSDataOutputStream(client.storeFileStream(file
|
||||
.getName()), statistics) {
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
if (!client.isConnected()) {
|
||||
throw new FTPException("Client not connected");
|
||||
}
|
||||
boolean cmdCompleted = client.completePendingCommand();
|
||||
disconnect(client);
|
||||
if (!cmdCompleted) {
|
||||
throw new FTPException("Could not complete transfer, Reply Code - "
|
||||
+ client.getReplyCode());
|
||||
}
|
||||
}
|
||||
};
|
||||
if (!FTPReply.isPositivePreliminary(client.getReplyCode())) {
|
||||
// The ftpClient is an inconsistent state. Must close the stream
|
||||
// which in turn will logout and disconnect from FTP server
|
||||
fos.close();
|
||||
throw new IOException("Unable to create file: " + file + ", Aborting");
|
||||
}
|
||||
return fos;
|
||||
}
|
||||
|
||||
/** This optional operation is not yet supported. */
|
||||
public FSDataOutputStream append(Path f, int bufferSize,
|
||||
Progressable progress) throws IOException {
|
||||
throw new IOException("Not supported");
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method, so that we don't open a new connection when using this
|
||||
* method from within another method. Otherwise every API invocation incurs
|
||||
* the overhead of opening/closing a TCP connection.
|
||||
*/
|
||||
private boolean exists(FTPClient client, Path file) {
|
||||
try {
|
||||
return getFileStatus(client, file) != null;
|
||||
} catch (FileNotFoundException fnfe) {
|
||||
return false;
|
||||
} catch (IOException ioe) {
|
||||
throw new FTPException("Failed to get file status", ioe);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean delete(Path file, boolean recursive) throws IOException {
|
||||
FTPClient client = connect();
|
||||
try {
|
||||
boolean success = delete(client, file, recursive);
|
||||
return success;
|
||||
} finally {
|
||||
disconnect(client);
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Use delete(Path, boolean) instead */
|
||||
@Deprecated
|
||||
private boolean delete(FTPClient client, Path file) throws IOException {
|
||||
return delete(client, file, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method, so that we don't open a new connection when using this
|
||||
* method from within another method. Otherwise every API invocation incurs
|
||||
* the overhead of opening/closing a TCP connection.
|
||||
*/
|
||||
private boolean delete(FTPClient client, Path file, boolean recursive)
|
||||
throws IOException {
|
||||
Path workDir = new Path(client.printWorkingDirectory());
|
||||
Path absolute = makeAbsolute(workDir, file);
|
||||
String pathName = absolute.toUri().getPath();
|
||||
FileStatus fileStat = getFileStatus(client, absolute);
|
||||
if (!fileStat.isDir()) {
|
||||
return client.deleteFile(pathName);
|
||||
}
|
||||
FileStatus[] dirEntries = listStatus(client, absolute);
|
||||
if (dirEntries != null && dirEntries.length > 0 && !(recursive)) {
|
||||
throw new IOException("Directory: " + file + " is not empty.");
|
||||
}
|
||||
if (dirEntries != null) {
|
||||
for (int i = 0; i < dirEntries.length; i++) {
|
||||
delete(client, new Path(absolute, dirEntries[i].getPath()), recursive);
|
||||
}
|
||||
}
|
||||
return client.removeDirectory(pathName);
|
||||
}
|
||||
|
||||
private FsAction getFsAction(int accessGroup, FTPFile ftpFile) {
|
||||
FsAction action = FsAction.NONE;
|
||||
if (ftpFile.hasPermission(accessGroup, FTPFile.READ_PERMISSION)) {
|
||||
action.or(FsAction.READ);
|
||||
}
|
||||
if (ftpFile.hasPermission(accessGroup, FTPFile.WRITE_PERMISSION)) {
|
||||
action.or(FsAction.WRITE);
|
||||
}
|
||||
if (ftpFile.hasPermission(accessGroup, FTPFile.EXECUTE_PERMISSION)) {
|
||||
action.or(FsAction.EXECUTE);
|
||||
}
|
||||
return action;
|
||||
}
|
||||
|
||||
private FsPermission getPermissions(FTPFile ftpFile) {
|
||||
FsAction user, group, others;
|
||||
user = getFsAction(FTPFile.USER_ACCESS, ftpFile);
|
||||
group = getFsAction(FTPFile.GROUP_ACCESS, ftpFile);
|
||||
others = getFsAction(FTPFile.WORLD_ACCESS, ftpFile);
|
||||
return new FsPermission(user, group, others);
|
||||
}
|
||||
|
||||
@Override
|
||||
public URI getUri() {
|
||||
return uri;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileStatus[] listStatus(Path file) throws IOException {
|
||||
FTPClient client = connect();
|
||||
try {
|
||||
FileStatus[] stats = listStatus(client, file);
|
||||
return stats;
|
||||
} finally {
|
||||
disconnect(client);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method, so that we don't open a new connection when using this
|
||||
* method from within another method. Otherwise every API invocation incurs
|
||||
* the overhead of opening/closing a TCP connection.
|
||||
*/
|
||||
private FileStatus[] listStatus(FTPClient client, Path file)
|
||||
throws IOException {
|
||||
Path workDir = new Path(client.printWorkingDirectory());
|
||||
Path absolute = makeAbsolute(workDir, file);
|
||||
FileStatus fileStat = getFileStatus(client, absolute);
|
||||
if (!fileStat.isDir()) {
|
||||
return new FileStatus[] { fileStat };
|
||||
}
|
||||
FTPFile[] ftpFiles = client.listFiles(absolute.toUri().getPath());
|
||||
FileStatus[] fileStats = new FileStatus[ftpFiles.length];
|
||||
for (int i = 0; i < ftpFiles.length; i++) {
|
||||
fileStats[i] = getFileStatus(ftpFiles[i], absolute);
|
||||
}
|
||||
return fileStats;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileStatus getFileStatus(Path file) throws IOException {
|
||||
FTPClient client = connect();
|
||||
try {
|
||||
FileStatus status = getFileStatus(client, file);
|
||||
return status;
|
||||
} finally {
|
||||
disconnect(client);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method, so that we don't open a new connection when using this
|
||||
* method from within another method. Otherwise every API invocation incurs
|
||||
* the overhead of opening/closing a TCP connection.
|
||||
*/
|
||||
private FileStatus getFileStatus(FTPClient client, Path file)
|
||||
throws IOException {
|
||||
FileStatus fileStat = null;
|
||||
Path workDir = new Path(client.printWorkingDirectory());
|
||||
Path absolute = makeAbsolute(workDir, file);
|
||||
Path parentPath = absolute.getParent();
|
||||
if (parentPath == null) { // root dir
|
||||
long length = -1; // Length of root dir on server not known
|
||||
boolean isDir = true;
|
||||
int blockReplication = 1;
|
||||
long blockSize = DEFAULT_BLOCK_SIZE; // Block Size not known.
|
||||
long modTime = -1; // Modification time of root dir not known.
|
||||
Path root = new Path("/");
|
||||
return new FileStatus(length, isDir, blockReplication, blockSize,
|
||||
modTime, root.makeQualified(this));
|
||||
}
|
||||
String pathName = parentPath.toUri().getPath();
|
||||
FTPFile[] ftpFiles = client.listFiles(pathName);
|
||||
if (ftpFiles != null) {
|
||||
for (FTPFile ftpFile : ftpFiles) {
|
||||
if (ftpFile.getName().equals(file.getName())) { // file found in dir
|
||||
fileStat = getFileStatus(ftpFile, parentPath);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (fileStat == null) {
|
||||
throw new FileNotFoundException("File " + file + " does not exist.");
|
||||
}
|
||||
} else {
|
||||
throw new FileNotFoundException("File " + file + " does not exist.");
|
||||
}
|
||||
return fileStat;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the file information in FTPFile to a {@link FileStatus} object. *
|
||||
*
|
||||
* @param ftpFile
|
||||
* @param parentPath
|
||||
* @return FileStatus
|
||||
*/
|
||||
private FileStatus getFileStatus(FTPFile ftpFile, Path parentPath) {
|
||||
long length = ftpFile.getSize();
|
||||
boolean isDir = ftpFile.isDirectory();
|
||||
int blockReplication = 1;
|
||||
// Using default block size since there is no way in FTP client to know of
|
||||
// block sizes on server. The assumption could be less than ideal.
|
||||
long blockSize = DEFAULT_BLOCK_SIZE;
|
||||
long modTime = ftpFile.getTimestamp().getTimeInMillis();
|
||||
long accessTime = 0;
|
||||
FsPermission permission = getPermissions(ftpFile);
|
||||
String user = ftpFile.getUser();
|
||||
String group = ftpFile.getGroup();
|
||||
Path filePath = new Path(parentPath, ftpFile.getName());
|
||||
return new FileStatus(length, isDir, blockReplication, blockSize, modTime,
|
||||
accessTime, permission, user, group, filePath.makeQualified(this));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean mkdirs(Path file, FsPermission permission) throws IOException {
|
||||
FTPClient client = connect();
|
||||
try {
|
||||
boolean success = mkdirs(client, file, permission);
|
||||
return success;
|
||||
} finally {
|
||||
disconnect(client);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method, so that we don't open a new connection when using this
|
||||
* method from within another method. Otherwise every API invocation incurs
|
||||
* the overhead of opening/closing a TCP connection.
|
||||
*/
|
||||
private boolean mkdirs(FTPClient client, Path file, FsPermission permission)
|
||||
throws IOException {
|
||||
boolean created = true;
|
||||
Path workDir = new Path(client.printWorkingDirectory());
|
||||
Path absolute = makeAbsolute(workDir, file);
|
||||
String pathName = absolute.getName();
|
||||
if (!exists(client, absolute)) {
|
||||
Path parent = absolute.getParent();
|
||||
created = (parent == null || mkdirs(client, parent, FsPermission
|
||||
.getDefault()));
|
||||
if (created) {
|
||||
String parentDir = parent.toUri().getPath();
|
||||
client.changeWorkingDirectory(parentDir);
|
||||
created = created & client.makeDirectory(pathName);
|
||||
}
|
||||
} else if (isFile(client, absolute)) {
|
||||
throw new IOException(String.format(
|
||||
"Can't make directory for path %s since it is a file.", absolute));
|
||||
}
|
||||
return created;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method, so that we don't open a new connection when using this
|
||||
* method from within another method. Otherwise every API invocation incurs
|
||||
* the overhead of opening/closing a TCP connection.
|
||||
*/
|
||||
private boolean isFile(FTPClient client, Path file) {
|
||||
try {
|
||||
return !getFileStatus(client, file).isDir();
|
||||
} catch (FileNotFoundException e) {
|
||||
return false; // file does not exist
|
||||
} catch (IOException ioe) {
|
||||
throw new FTPException("File check failed", ioe);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Assuming that parent of both source and destination is the same. Is the
|
||||
* assumption correct or it is suppose to work like 'move' ?
|
||||
*/
|
||||
@Override
|
||||
public boolean rename(Path src, Path dst) throws IOException {
|
||||
FTPClient client = connect();
|
||||
try {
|
||||
boolean success = rename(client, src, dst);
|
||||
return success;
|
||||
} finally {
|
||||
disconnect(client);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method, so that we don't open a new connection when using this
|
||||
* method from within another method. Otherwise every API invocation incurs
|
||||
* the overhead of opening/closing a TCP connection.
|
||||
*
|
||||
* @param client
|
||||
* @param src
|
||||
* @param dst
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
private boolean rename(FTPClient client, Path src, Path dst)
|
||||
throws IOException {
|
||||
Path workDir = new Path(client.printWorkingDirectory());
|
||||
Path absoluteSrc = makeAbsolute(workDir, src);
|
||||
Path absoluteDst = makeAbsolute(workDir, dst);
|
||||
if (!exists(client, absoluteSrc)) {
|
||||
throw new IOException("Source path " + src + " does not exist");
|
||||
}
|
||||
if (exists(client, absoluteDst)) {
|
||||
throw new IOException("Destination path " + dst
|
||||
+ " already exist, cannot rename!");
|
||||
}
|
||||
String parentSrc = absoluteSrc.getParent().toUri().toString();
|
||||
String parentDst = absoluteDst.getParent().toUri().toString();
|
||||
String from = src.getName();
|
||||
String to = dst.getName();
|
||||
if (!parentSrc.equals(parentDst)) {
|
||||
throw new IOException("Cannot rename parent(source): " + parentSrc
|
||||
+ ", parent(destination): " + parentDst);
|
||||
}
|
||||
client.changeWorkingDirectory(parentSrc);
|
||||
boolean renamed = client.rename(from, to);
|
||||
return renamed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getWorkingDirectory() {
|
||||
// Return home directory always since we do not maintain state.
|
||||
return getHomeDirectory();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getHomeDirectory() {
|
||||
FTPClient client = null;
|
||||
try {
|
||||
client = connect();
|
||||
Path homeDir = new Path(client.printWorkingDirectory());
|
||||
return homeDir;
|
||||
} catch (IOException ioe) {
|
||||
throw new FTPException("Failed to get home directory", ioe);
|
||||
} finally {
|
||||
try {
|
||||
disconnect(client);
|
||||
} catch (IOException ioe) {
|
||||
throw new FTPException("Failed to disconnect", ioe);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setWorkingDirectory(Path newDir) {
|
||||
// we do not maintain the working directory state
|
||||
}
|
||||
}
|
|
@ -0,0 +1,126 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.ftp;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.commons.net.ftp.FTPClient;
|
||||
import org.apache.hadoop.fs.FSInputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
||||
public class FTPInputStream extends FSInputStream {
|
||||
|
||||
InputStream wrappedStream;
|
||||
FTPClient client;
|
||||
FileSystem.Statistics stats;
|
||||
boolean closed;
|
||||
long pos;
|
||||
|
||||
public FTPInputStream(InputStream stream, FTPClient client,
|
||||
FileSystem.Statistics stats) {
|
||||
if (stream == null) {
|
||||
throw new IllegalArgumentException("Null InputStream");
|
||||
}
|
||||
if (client == null || !client.isConnected()) {
|
||||
throw new IllegalArgumentException("FTP client null or not connected");
|
||||
}
|
||||
this.wrappedStream = stream;
|
||||
this.client = client;
|
||||
this.stats = stats;
|
||||
this.pos = 0;
|
||||
this.closed = false;
|
||||
}
|
||||
|
||||
public long getPos() throws IOException {
|
||||
return pos;
|
||||
}
|
||||
|
||||
// We don't support seek.
|
||||
public void seek(long pos) throws IOException {
|
||||
throw new IOException("Seek not supported");
|
||||
}
|
||||
|
||||
public boolean seekToNewSource(long targetPos) throws IOException {
|
||||
throw new IOException("Seek not supported");
|
||||
}
|
||||
|
||||
public synchronized int read() throws IOException {
|
||||
if (closed) {
|
||||
throw new IOException("Stream closed");
|
||||
}
|
||||
|
||||
int byteRead = wrappedStream.read();
|
||||
if (byteRead >= 0) {
|
||||
pos++;
|
||||
}
|
||||
if (stats != null & byteRead >= 0) {
|
||||
stats.incrementBytesRead(1);
|
||||
}
|
||||
return byteRead;
|
||||
}
|
||||
|
||||
public synchronized int read(byte buf[], int off, int len) throws IOException {
|
||||
if (closed) {
|
||||
throw new IOException("Stream closed");
|
||||
}
|
||||
|
||||
int result = wrappedStream.read(buf, off, len);
|
||||
if (result > 0) {
|
||||
pos += result;
|
||||
}
|
||||
if (stats != null & result > 0) {
|
||||
stats.incrementBytesRead(result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public synchronized void close() throws IOException {
|
||||
if (closed) {
|
||||
throw new IOException("Stream closed");
|
||||
}
|
||||
super.close();
|
||||
closed = true;
|
||||
if (!client.isConnected()) {
|
||||
throw new FTPException("Client not connected");
|
||||
}
|
||||
|
||||
boolean cmdCompleted = client.completePendingCommand();
|
||||
client.logout();
|
||||
client.disconnect();
|
||||
if (!cmdCompleted) {
|
||||
throw new FTPException("Could not complete transfer, Reply Code - "
|
||||
+ client.getReplyCode());
|
||||
}
|
||||
}
|
||||
|
||||
// Not supported.
|
||||
|
||||
public boolean markSupported() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public void mark(int readLimit) {
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
throw new IOException("Mark not supported");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/**
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
* implied. See the License for the specific language governing
|
||||
* permissions and limitations under the License.
|
||||
*
|
||||
* @author: Sriram Rao (Kosmix Corp.)
|
||||
*
|
||||
* We need to provide the ability to the code in fs/kfs without really
|
||||
* having a KFS deployment. In particular, the glue code that wraps
|
||||
* around calls to KfsAccess object. This is accomplished by defining a
|
||||
* filesystem implementation interface:
|
||||
* -- for testing purposes, a dummy implementation of this interface
|
||||
* will suffice; as long as the dummy implementation is close enough
|
||||
* to doing what KFS does, we are good.
|
||||
* -- for deployment purposes with KFS, this interface is
|
||||
* implemented by the KfsImpl object.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.kfs;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
interface IFSImpl {
|
||||
public boolean exists(String path) throws IOException;
|
||||
public boolean isDirectory(String path) throws IOException;
|
||||
public boolean isFile(String path) throws IOException;
|
||||
public String[] readdir(String path) throws IOException;
|
||||
public FileStatus[] readdirplus(Path path) throws IOException;
|
||||
|
||||
public int mkdirs(String path) throws IOException;
|
||||
public int rename(String source, String dest) throws IOException;
|
||||
|
||||
public int rmdir(String path) throws IOException;
|
||||
public int remove(String path) throws IOException;
|
||||
public long filesize(String path) throws IOException;
|
||||
public short getReplication(String path) throws IOException;
|
||||
public short setReplication(String path, short replication) throws IOException;
|
||||
public String[][] getDataLocation(String path, long start, long len) throws IOException;
|
||||
|
||||
public long getModificationTime(String path) throws IOException;
|
||||
public FSDataOutputStream create(String path, short replication, int bufferSize, Progressable progress) throws IOException;
|
||||
public FSDataInputStream open(String path, int bufferSize) throws IOException;
|
||||
public FSDataOutputStream append(String path, int bufferSize, Progressable progress) throws IOException;
|
||||
|
||||
};
|
|
@ -0,0 +1,151 @@
|
|||
/**
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
* implied. See the License for the specific language governing
|
||||
* permissions and limitations under the License.
|
||||
*
|
||||
* @author: Sriram Rao (Kosmix Corp.)
|
||||
*
|
||||
* Provide the implementation of KFS which turn into calls to KfsAccess.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.kfs;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import org.kosmix.kosmosfs.access.KfsAccess;
|
||||
import org.kosmix.kosmosfs.access.KfsFileAttr;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
class KFSImpl implements IFSImpl {
|
||||
private KfsAccess kfsAccess = null;
|
||||
private FileSystem.Statistics statistics;
|
||||
|
||||
@Deprecated
|
||||
public KFSImpl(String metaServerHost, int metaServerPort
|
||||
) throws IOException {
|
||||
this(metaServerHost, metaServerPort, null);
|
||||
}
|
||||
|
||||
public KFSImpl(String metaServerHost, int metaServerPort,
|
||||
FileSystem.Statistics stats) throws IOException {
|
||||
kfsAccess = new KfsAccess(metaServerHost, metaServerPort);
|
||||
statistics = stats;
|
||||
}
|
||||
|
||||
public boolean exists(String path) throws IOException {
|
||||
return kfsAccess.kfs_exists(path);
|
||||
}
|
||||
|
||||
public boolean isDirectory(String path) throws IOException {
|
||||
return kfsAccess.kfs_isDirectory(path);
|
||||
}
|
||||
|
||||
public boolean isFile(String path) throws IOException {
|
||||
return kfsAccess.kfs_isFile(path);
|
||||
}
|
||||
|
||||
public String[] readdir(String path) throws IOException {
|
||||
return kfsAccess.kfs_readdir(path);
|
||||
}
|
||||
|
||||
public FileStatus[] readdirplus(Path path) throws IOException {
|
||||
String srep = path.toUri().getPath();
|
||||
KfsFileAttr[] fattr = kfsAccess.kfs_readdirplus(srep);
|
||||
if (fattr == null)
|
||||
return null;
|
||||
int numEntries = 0;
|
||||
for (int i = 0; i < fattr.length; i++) {
|
||||
if ((fattr[i].filename.compareTo(".") == 0) || (fattr[i].filename.compareTo("..") == 0))
|
||||
continue;
|
||||
numEntries++;
|
||||
}
|
||||
FileStatus[] fstatus = new FileStatus[numEntries];
|
||||
int j = 0;
|
||||
for (int i = 0; i < fattr.length; i++) {
|
||||
if ((fattr[i].filename.compareTo(".") == 0) || (fattr[i].filename.compareTo("..") == 0))
|
||||
continue;
|
||||
Path fn = new Path(path, fattr[i].filename);
|
||||
|
||||
if (fattr[i].isDirectory)
|
||||
fstatus[j] = new FileStatus(0, true, 1, 0, fattr[i].modificationTime, fn);
|
||||
else
|
||||
fstatus[j] = new FileStatus(fattr[i].filesize, fattr[i].isDirectory,
|
||||
fattr[i].replication,
|
||||
(long)
|
||||
(1 << 26),
|
||||
fattr[i].modificationTime,
|
||||
fn);
|
||||
|
||||
j++;
|
||||
}
|
||||
return fstatus;
|
||||
}
|
||||
|
||||
|
||||
public int mkdirs(String path) throws IOException {
|
||||
return kfsAccess.kfs_mkdirs(path);
|
||||
}
|
||||
|
||||
public int rename(String source, String dest) throws IOException {
|
||||
return kfsAccess.kfs_rename(source, dest);
|
||||
}
|
||||
|
||||
public int rmdir(String path) throws IOException {
|
||||
return kfsAccess.kfs_rmdir(path);
|
||||
}
|
||||
|
||||
public int remove(String path) throws IOException {
|
||||
return kfsAccess.kfs_remove(path);
|
||||
}
|
||||
|
||||
public long filesize(String path) throws IOException {
|
||||
return kfsAccess.kfs_filesize(path);
|
||||
}
|
||||
|
||||
public short getReplication(String path) throws IOException {
|
||||
return kfsAccess.kfs_getReplication(path);
|
||||
}
|
||||
|
||||
public short setReplication(String path, short replication) throws IOException {
|
||||
return kfsAccess.kfs_setReplication(path, replication);
|
||||
}
|
||||
|
||||
public String[][] getDataLocation(String path, long start, long len) throws IOException {
|
||||
return kfsAccess.kfs_getDataLocation(path, start, len);
|
||||
}
|
||||
|
||||
public long getModificationTime(String path) throws IOException {
|
||||
return kfsAccess.kfs_getModificationTime(path);
|
||||
}
|
||||
|
||||
public FSDataInputStream open(String path, int bufferSize) throws IOException {
|
||||
return new FSDataInputStream(new KFSInputStream(kfsAccess, path,
|
||||
statistics));
|
||||
}
|
||||
|
||||
public FSDataOutputStream create(String path, short replication, int bufferSize, Progressable progress) throws IOException {
|
||||
return new FSDataOutputStream(new KFSOutputStream(kfsAccess, path, replication, false, progress),
|
||||
statistics);
|
||||
}
|
||||
|
||||
public FSDataOutputStream append(String path, int bufferSize, Progressable progress) throws IOException {
|
||||
// when opening for append, # of replicas is ignored
|
||||
return new FSDataOutputStream(new KFSOutputStream(kfsAccess, path, (short) 1, true, progress),
|
||||
statistics);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
/**
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
* implied. See the License for the specific language governing
|
||||
* permissions and limitations under the License.
|
||||
*
|
||||
* @author: Sriram Rao (Kosmix Corp.)
|
||||
*
|
||||
* Implements the Hadoop FSInputStream interfaces to allow applications to read
|
||||
* files in Kosmos File System (KFS).
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.kfs;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FSInputStream;
|
||||
|
||||
import org.kosmix.kosmosfs.access.KfsAccess;
|
||||
import org.kosmix.kosmosfs.access.KfsInputChannel;
|
||||
|
||||
class KFSInputStream extends FSInputStream {
|
||||
|
||||
private KfsInputChannel kfsChannel;
|
||||
private FileSystem.Statistics statistics;
|
||||
private long fsize;
|
||||
|
||||
@Deprecated
|
||||
public KFSInputStream(KfsAccess kfsAccess, String path) {
|
||||
this(kfsAccess, path, null);
|
||||
}
|
||||
|
||||
public KFSInputStream(KfsAccess kfsAccess, String path,
|
||||
FileSystem.Statistics stats) {
|
||||
this.statistics = stats;
|
||||
this.kfsChannel = kfsAccess.kfs_open(path);
|
||||
if (this.kfsChannel != null)
|
||||
this.fsize = kfsAccess.kfs_filesize(path);
|
||||
else
|
||||
this.fsize = 0;
|
||||
}
|
||||
|
||||
public long getPos() throws IOException {
|
||||
if (kfsChannel == null) {
|
||||
throw new IOException("File closed");
|
||||
}
|
||||
return kfsChannel.tell();
|
||||
}
|
||||
|
||||
public synchronized int available() throws IOException {
|
||||
if (kfsChannel == null) {
|
||||
throw new IOException("File closed");
|
||||
}
|
||||
return (int) (this.fsize - getPos());
|
||||
}
|
||||
|
||||
public synchronized void seek(long targetPos) throws IOException {
|
||||
if (kfsChannel == null) {
|
||||
throw new IOException("File closed");
|
||||
}
|
||||
kfsChannel.seek(targetPos);
|
||||
}
|
||||
|
||||
public synchronized boolean seekToNewSource(long targetPos) throws IOException {
|
||||
return false;
|
||||
}
|
||||
|
||||
public synchronized int read() throws IOException {
|
||||
if (kfsChannel == null) {
|
||||
throw new IOException("File closed");
|
||||
}
|
||||
byte b[] = new byte[1];
|
||||
int res = read(b, 0, 1);
|
||||
if (res == 1) {
|
||||
if (statistics != null) {
|
||||
statistics.incrementBytesRead(1);
|
||||
}
|
||||
return b[0] & 0xff;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
public synchronized int read(byte b[], int off, int len) throws IOException {
|
||||
if (kfsChannel == null) {
|
||||
throw new IOException("File closed");
|
||||
}
|
||||
int res;
|
||||
|
||||
res = kfsChannel.read(ByteBuffer.wrap(b, off, len));
|
||||
// Use -1 to signify EOF
|
||||
if (res == 0)
|
||||
return -1;
|
||||
if (statistics != null) {
|
||||
statistics.incrementBytesRead(res);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public synchronized void close() throws IOException {
|
||||
if (kfsChannel == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
kfsChannel.close();
|
||||
kfsChannel = null;
|
||||
}
|
||||
|
||||
public boolean markSupported() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public void mark(int readLimit) {
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
throw new IOException("Mark not supported");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
/**
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
* implied. See the License for the specific language governing
|
||||
* permissions and limitations under the License.
|
||||
*
|
||||
* @author: Sriram Rao (Kosmix Corp.)
|
||||
*
|
||||
* Implements the Hadoop FSOutputStream interfaces to allow applications to write to
|
||||
* files in Kosmos File System (KFS).
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.kfs;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.*;
|
||||
import java.util.*;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
import org.kosmix.kosmosfs.access.KfsAccess;
|
||||
import org.kosmix.kosmosfs.access.KfsOutputChannel;
|
||||
|
||||
class KFSOutputStream extends OutputStream {
|
||||
|
||||
private String path;
|
||||
private KfsOutputChannel kfsChannel;
|
||||
private Progressable progressReporter;
|
||||
|
||||
public KFSOutputStream(KfsAccess kfsAccess, String path, short replication,
|
||||
boolean append, Progressable prog) {
|
||||
this.path = path;
|
||||
|
||||
if ((append) && (kfsAccess.kfs_isFile(path)))
|
||||
this.kfsChannel = kfsAccess.kfs_append(path);
|
||||
else
|
||||
this.kfsChannel = kfsAccess.kfs_create(path, replication);
|
||||
this.progressReporter = prog;
|
||||
}
|
||||
|
||||
public long getPos() throws IOException {
|
||||
if (kfsChannel == null) {
|
||||
throw new IOException("File closed");
|
||||
}
|
||||
return kfsChannel.tell();
|
||||
}
|
||||
|
||||
public void write(int v) throws IOException {
|
||||
if (kfsChannel == null) {
|
||||
throw new IOException("File closed");
|
||||
}
|
||||
byte[] b = new byte[1];
|
||||
|
||||
b[0] = (byte) v;
|
||||
write(b, 0, 1);
|
||||
}
|
||||
|
||||
public void write(byte b[], int off, int len) throws IOException {
|
||||
if (kfsChannel == null) {
|
||||
throw new IOException("File closed");
|
||||
}
|
||||
|
||||
// touch the progress before going into KFS since the call can block
|
||||
progressReporter.progress();
|
||||
kfsChannel.write(ByteBuffer.wrap(b, off, len));
|
||||
}
|
||||
|
||||
public void flush() throws IOException {
|
||||
if (kfsChannel == null) {
|
||||
throw new IOException("File closed");
|
||||
}
|
||||
// touch the progress before going into KFS since the call can block
|
||||
progressReporter.progress();
|
||||
kfsChannel.sync();
|
||||
}
|
||||
|
||||
public synchronized void close() throws IOException {
|
||||
if (kfsChannel == null) {
|
||||
return;
|
||||
}
|
||||
flush();
|
||||
kfsChannel.close();
|
||||
kfsChannel = null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,340 @@
|
|||
/**
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
* implied. See the License for the specific language governing
|
||||
* permissions and limitations under the License.
|
||||
*
|
||||
* @author: Sriram Rao (Kosmix Corp.)
|
||||
*
|
||||
* Implements the Hadoop FS interfaces to allow applications to store
|
||||
*files in Kosmos File System (KFS).
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.kfs;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.BlockLocation;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
/**
|
||||
* A FileSystem backed by KFS.
|
||||
*
|
||||
*/
|
||||
|
||||
public class KosmosFileSystem extends FileSystem {
|
||||
|
||||
private FileSystem localFs;
|
||||
private IFSImpl kfsImpl = null;
|
||||
private URI uri;
|
||||
private Path workingDir = new Path("/");
|
||||
|
||||
public KosmosFileSystem() {
|
||||
|
||||
}
|
||||
|
||||
KosmosFileSystem(IFSImpl fsimpl) {
|
||||
this.kfsImpl = fsimpl;
|
||||
}
|
||||
|
||||
@Override
|
||||
public URI getUri() {
|
||||
return uri;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize(URI uri, Configuration conf) throws IOException {
|
||||
super.initialize(uri, conf);
|
||||
try {
|
||||
if (kfsImpl == null) {
|
||||
if (uri.getHost() == null) {
|
||||
kfsImpl = new KFSImpl(conf.get("fs.kfs.metaServerHost", ""),
|
||||
conf.getInt("fs.kfs.metaServerPort", -1),
|
||||
statistics);
|
||||
} else {
|
||||
kfsImpl = new KFSImpl(uri.getHost(), uri.getPort(), statistics);
|
||||
}
|
||||
}
|
||||
|
||||
this.localFs = FileSystem.getLocal(conf);
|
||||
this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
|
||||
this.workingDir = new Path("/user", System.getProperty("user.name")
|
||||
).makeQualified(this);
|
||||
setConf(conf);
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
System.out.println("Unable to initialize KFS");
|
||||
System.exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getWorkingDirectory() {
|
||||
return workingDir;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setWorkingDirectory(Path dir) {
|
||||
workingDir = makeAbsolute(dir);
|
||||
}
|
||||
|
||||
private Path makeAbsolute(Path path) {
|
||||
if (path.isAbsolute()) {
|
||||
return path;
|
||||
}
|
||||
return new Path(workingDir, path);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean mkdirs(Path path, FsPermission permission
|
||||
) throws IOException {
|
||||
Path absolute = makeAbsolute(path);
|
||||
String srep = absolute.toUri().getPath();
|
||||
|
||||
int res;
|
||||
|
||||
// System.out.println("Calling mkdirs on: " + srep);
|
||||
|
||||
res = kfsImpl.mkdirs(srep);
|
||||
|
||||
return res == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
@Deprecated
|
||||
public boolean isDirectory(Path path) throws IOException {
|
||||
Path absolute = makeAbsolute(path);
|
||||
String srep = absolute.toUri().getPath();
|
||||
|
||||
// System.out.println("Calling isdir on: " + srep);
|
||||
|
||||
return kfsImpl.isDirectory(srep);
|
||||
}
|
||||
|
||||
@Override
|
||||
@Deprecated
|
||||
public boolean isFile(Path path) throws IOException {
|
||||
Path absolute = makeAbsolute(path);
|
||||
String srep = absolute.toUri().getPath();
|
||||
return kfsImpl.isFile(srep);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileStatus[] listStatus(Path path) throws IOException {
|
||||
Path absolute = makeAbsolute(path);
|
||||
String srep = absolute.toUri().getPath();
|
||||
|
||||
if (kfsImpl.isFile(srep))
|
||||
return new FileStatus[] { getFileStatus(path) } ;
|
||||
|
||||
return kfsImpl.readdirplus(absolute);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileStatus getFileStatus(Path path) throws IOException {
|
||||
Path absolute = makeAbsolute(path);
|
||||
String srep = absolute.toUri().getPath();
|
||||
if (!kfsImpl.exists(srep)) {
|
||||
throw new FileNotFoundException("File " + path + " does not exist.");
|
||||
}
|
||||
if (kfsImpl.isDirectory(srep)) {
|
||||
// System.out.println("Status of path: " + path + " is dir");
|
||||
return new FileStatus(0, true, 1, 0, kfsImpl.getModificationTime(srep),
|
||||
path.makeQualified(this));
|
||||
} else {
|
||||
// System.out.println("Status of path: " + path + " is file");
|
||||
return new FileStatus(kfsImpl.filesize(srep), false,
|
||||
kfsImpl.getReplication(srep),
|
||||
getDefaultBlockSize(),
|
||||
kfsImpl.getModificationTime(srep),
|
||||
path.makeQualified(this));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream append(Path f, int bufferSize,
|
||||
Progressable progress) throws IOException {
|
||||
Path parent = f.getParent();
|
||||
if (parent != null && !mkdirs(parent)) {
|
||||
throw new IOException("Mkdirs failed to create " + parent);
|
||||
}
|
||||
|
||||
Path absolute = makeAbsolute(f);
|
||||
String srep = absolute.toUri().getPath();
|
||||
|
||||
return kfsImpl.append(srep, bufferSize, progress);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream create(Path file, FsPermission permission,
|
||||
boolean overwrite, int bufferSize,
|
||||
short replication, long blockSize, Progressable progress)
|
||||
throws IOException {
|
||||
|
||||
if (exists(file)) {
|
||||
if (overwrite) {
|
||||
delete(file, true);
|
||||
} else {
|
||||
throw new IOException("File already exists: " + file);
|
||||
}
|
||||
}
|
||||
|
||||
Path parent = file.getParent();
|
||||
if (parent != null && !mkdirs(parent)) {
|
||||
throw new IOException("Mkdirs failed to create " + parent);
|
||||
}
|
||||
|
||||
Path absolute = makeAbsolute(file);
|
||||
String srep = absolute.toUri().getPath();
|
||||
|
||||
return kfsImpl.create(srep, replication, bufferSize, progress);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataInputStream open(Path path, int bufferSize) throws IOException {
|
||||
if (!exists(path))
|
||||
throw new IOException("File does not exist: " + path);
|
||||
|
||||
Path absolute = makeAbsolute(path);
|
||||
String srep = absolute.toUri().getPath();
|
||||
|
||||
return kfsImpl.open(srep, bufferSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean rename(Path src, Path dst) throws IOException {
|
||||
Path absoluteS = makeAbsolute(src);
|
||||
String srepS = absoluteS.toUri().getPath();
|
||||
Path absoluteD = makeAbsolute(dst);
|
||||
String srepD = absoluteD.toUri().getPath();
|
||||
|
||||
// System.out.println("Calling rename on: " + srepS + " -> " + srepD);
|
||||
|
||||
return kfsImpl.rename(srepS, srepD) == 0;
|
||||
}
|
||||
|
||||
// recursively delete the directory and its contents
|
||||
@Override
|
||||
public boolean delete(Path path, boolean recursive) throws IOException {
|
||||
Path absolute = makeAbsolute(path);
|
||||
String srep = absolute.toUri().getPath();
|
||||
if (kfsImpl.isFile(srep))
|
||||
return kfsImpl.remove(srep) == 0;
|
||||
|
||||
FileStatus[] dirEntries = listStatus(absolute);
|
||||
if ((!recursive) && (dirEntries != null) &&
|
||||
(dirEntries.length != 0)) {
|
||||
throw new IOException("Directory " + path.toString() +
|
||||
" is not empty.");
|
||||
}
|
||||
if (dirEntries != null) {
|
||||
for (int i = 0; i < dirEntries.length; i++) {
|
||||
delete(new Path(absolute, dirEntries[i].getPath()), recursive);
|
||||
}
|
||||
}
|
||||
return kfsImpl.rmdir(srep) == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public short getDefaultReplication() {
|
||||
return 3;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean setReplication(Path path, short replication)
|
||||
throws IOException {
|
||||
|
||||
Path absolute = makeAbsolute(path);
|
||||
String srep = absolute.toUri().getPath();
|
||||
|
||||
int res = kfsImpl.setReplication(srep, replication);
|
||||
return res >= 0;
|
||||
}
|
||||
|
||||
// 64MB is the KFS block size
|
||||
|
||||
@Override
|
||||
public long getDefaultBlockSize() {
|
||||
return 1 << 26;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void lock(Path path, boolean shared) throws IOException {
|
||||
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void release(Path path) throws IOException {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Return null if the file doesn't exist; otherwise, get the
|
||||
* locations of the various chunks of the file file from KFS.
|
||||
*/
|
||||
@Override
|
||||
public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
|
||||
long len) throws IOException {
|
||||
|
||||
if (file == null) {
|
||||
return null;
|
||||
}
|
||||
String srep = makeAbsolute(file.getPath()).toUri().getPath();
|
||||
String[][] hints = kfsImpl.getDataLocation(srep, start, len);
|
||||
if (hints == null) {
|
||||
return null;
|
||||
}
|
||||
BlockLocation[] result = new BlockLocation[hints.length];
|
||||
long blockSize = getDefaultBlockSize();
|
||||
long length = len;
|
||||
long blockStart = start;
|
||||
for(int i=0; i < result.length; ++i) {
|
||||
result[i] = new BlockLocation(null, hints[i], blockStart,
|
||||
length < blockSize ? length : blockSize);
|
||||
blockStart += blockSize;
|
||||
length -= blockSize;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
|
||||
FileUtil.copy(localFs, src, this, dst, delSrc, getConf());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyToLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
|
||||
FileUtil.copy(this, src, localFs, dst, delSrc, getConf());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
|
||||
throws IOException {
|
||||
return tmpLocalFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
|
||||
throws IOException {
|
||||
moveFromLocalFile(tmpLocalFile, fsOutputFile);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
<html>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<head></head>
|
||||
<body>
|
||||
<h1>A client for the Kosmos filesystem (KFS)</h1>
|
||||
|
||||
<h3>Introduction</h3>
|
||||
|
||||
This pages describes how to use Kosmos Filesystem
|
||||
(<a href="http://kosmosfs.sourceforge.net"> KFS </a>) as a backing
|
||||
store with Hadoop. This page assumes that you have downloaded the
|
||||
KFS software and installed necessary binaries as outlined in the KFS
|
||||
documentation.
|
||||
|
||||
<h3>Steps</h3>
|
||||
|
||||
<ul>
|
||||
<li>In the Hadoop conf directory edit core-site.xml,
|
||||
add the following:
|
||||
<pre>
|
||||
<property>
|
||||
<name>fs.kfs.impl</name>
|
||||
<value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
|
||||
<description>The FileSystem for kfs: uris.</description>
|
||||
</property>
|
||||
</pre>
|
||||
|
||||
<li>In the Hadoop conf directory edit core-site.xml,
|
||||
adding the following (with appropriate values for
|
||||
<server> and <port>):
|
||||
<pre>
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>kfs://<server:port></value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.kfs.metaServerHost</name>
|
||||
<value><server></value>
|
||||
<description>The location of the KFS meta server.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.kfs.metaServerPort</name>
|
||||
<value><port></value>
|
||||
<description>The location of the meta server's port.</description>
|
||||
</property>
|
||||
|
||||
</pre>
|
||||
</li>
|
||||
|
||||
<li>Copy KFS's <i> kfs-0.1.jar </i> to Hadoop's lib directory. This step
|
||||
enables Hadoop's to load the KFS specific modules. Note
|
||||
that, kfs-0.1.jar was built when you compiled KFS source
|
||||
code. This jar file contains code that calls KFS's client
|
||||
library code via JNI; the native code is in KFS's <i>
|
||||
libkfsClient.so </i> library.
|
||||
</li>
|
||||
|
||||
<li> When the Hadoop map/reduce trackers start up, those
|
||||
processes (on local as well as remote nodes) will now need to load
|
||||
KFS's <i> libkfsClient.so </i> library. To simplify this process, it is advisable to
|
||||
store libkfsClient.so in an NFS accessible directory (similar to where
|
||||
Hadoop binaries/scripts are stored); then, modify Hadoop's
|
||||
conf/hadoop-env.sh adding the following line and providing suitable
|
||||
value for <path>:
|
||||
<pre>
|
||||
export LD_LIBRARY_PATH=<path>
|
||||
</pre>
|
||||
|
||||
|
||||
<li>Start only the map/reduce trackers
|
||||
<br />
|
||||
example: execute Hadoop's bin/start-mapred.sh</li>
|
||||
</ul>
|
||||
<br/>
|
||||
|
||||
If the map/reduce job trackers start up, all file-I/O is done to KFS.
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,23 @@
|
|||
<html>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<body>
|
||||
An abstract file system API.
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,61 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.permission;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* An exception class for access control related issues.
|
||||
* @deprecated Use {@link org.apache.hadoop.security.AccessControlException}
|
||||
* instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public class AccessControlException extends IOException {
|
||||
//Required by {@link java.io.Serializable}.
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
* Default constructor is needed for unwrapping from
|
||||
* {@link org.apache.hadoop.ipc.RemoteException}.
|
||||
*/
|
||||
public AccessControlException() {
|
||||
super("Permission denied.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an {@link AccessControlException}
|
||||
* with the specified detail message.
|
||||
* @param s the detail message.
|
||||
*/
|
||||
public AccessControlException(String s) {
|
||||
super(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new exception with the specified cause and a detail
|
||||
* message of <tt>(cause==null ? null : cause.toString())</tt> (which
|
||||
* typically contains the class and detail message of <tt>cause</tt>).
|
||||
* @param cause the cause (which is saved for later retrieval by the
|
||||
* {@link #getCause()} method). (A <tt>null</tt> value is
|
||||
* permitted, and indicates that the cause is nonexistent or
|
||||
* unknown.)
|
||||
*/
|
||||
public AccessControlException(Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.permission;
|
||||
|
||||
/**
|
||||
* File system actions, e.g. read, write, etc.
|
||||
*/
|
||||
public enum FsAction {
|
||||
// POSIX style
|
||||
NONE("---"),
|
||||
EXECUTE("--x"),
|
||||
WRITE("-w-"),
|
||||
WRITE_EXECUTE("-wx"),
|
||||
READ("r--"),
|
||||
READ_EXECUTE("r-x"),
|
||||
READ_WRITE("rw-"),
|
||||
ALL("rwx");
|
||||
|
||||
/** Retain reference to value array. */
|
||||
private final static FsAction[] vals = values();
|
||||
|
||||
/** Symbolic representation */
|
||||
public final String SYMBOL;
|
||||
|
||||
private FsAction(String s) {
|
||||
SYMBOL = s;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if this action implies that action.
|
||||
* @param that
|
||||
*/
|
||||
public boolean implies(FsAction that) {
|
||||
if (that != null) {
|
||||
return (ordinal() & that.ordinal()) == that.ordinal();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** AND operation. */
|
||||
public FsAction and(FsAction that) {
|
||||
return vals[ordinal() & that.ordinal()];
|
||||
}
|
||||
/** OR operation. */
|
||||
public FsAction or(FsAction that) {
|
||||
return vals[ordinal() | that.ordinal()];
|
||||
}
|
||||
/** NOT operation. */
|
||||
public FsAction not() {
|
||||
return vals[7 - ordinal()];
|
||||
}
|
||||
}
|
|
@ -0,0 +1,232 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.permission;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.io.*;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A class for file/directory permissions.
|
||||
*/
|
||||
public class FsPermission implements Writable {
|
||||
static final WritableFactory FACTORY = new WritableFactory() {
|
||||
public Writable newInstance() { return new FsPermission(); }
|
||||
};
|
||||
static { // register a ctor
|
||||
WritableFactories.setFactory(FsPermission.class, FACTORY);
|
||||
}
|
||||
|
||||
/** Create an immutable {@link FsPermission} object. */
|
||||
public static FsPermission createImmutable(short permission) {
|
||||
return new FsPermission(permission) {
|
||||
public FsPermission applyUMask(FsPermission umask) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
//POSIX permission style
|
||||
private FsAction useraction = null;
|
||||
private FsAction groupaction = null;
|
||||
private FsAction otheraction = null;
|
||||
private boolean stickyBit = false;
|
||||
|
||||
private FsPermission() {}
|
||||
|
||||
/**
|
||||
* Construct by the given {@link FsAction}.
|
||||
* @param u user action
|
||||
* @param g group action
|
||||
* @param o other action
|
||||
*/
|
||||
public FsPermission(FsAction u, FsAction g, FsAction o) {
|
||||
this(u, g, o, false);
|
||||
}
|
||||
|
||||
public FsPermission(FsAction u, FsAction g, FsAction o, boolean sb) {
|
||||
set(u, g, o, sb);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct by the given mode.
|
||||
* @param mode
|
||||
* @see #toShort()
|
||||
*/
|
||||
public FsPermission(short mode) { fromShort(mode); }
|
||||
|
||||
/**
|
||||
* Copy constructor
|
||||
*
|
||||
* @param other other permission
|
||||
*/
|
||||
public FsPermission(FsPermission other) {
|
||||
this.useraction = other.useraction;
|
||||
this.groupaction = other.groupaction;
|
||||
this.otheraction = other.otheraction;
|
||||
}
|
||||
|
||||
/** Return user {@link FsAction}. */
|
||||
public FsAction getUserAction() {return useraction;}
|
||||
|
||||
/** Return group {@link FsAction}. */
|
||||
public FsAction getGroupAction() {return groupaction;}
|
||||
|
||||
/** Return other {@link FsAction}. */
|
||||
public FsAction getOtherAction() {return otheraction;}
|
||||
|
||||
private void set(FsAction u, FsAction g, FsAction o, boolean sb) {
|
||||
useraction = u;
|
||||
groupaction = g;
|
||||
otheraction = o;
|
||||
stickyBit = sb;
|
||||
}
|
||||
|
||||
public void fromShort(short n) {
|
||||
FsAction[] v = FsAction.values();
|
||||
|
||||
set(v[(n >>> 6) & 7], v[(n >>> 3) & 7], v[n & 7], (((n >>> 9) & 1) == 1) );
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeShort(toShort());
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
fromShort(in.readShort());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and initialize a {@link FsPermission} from {@link DataInput}.
|
||||
*/
|
||||
public static FsPermission read(DataInput in) throws IOException {
|
||||
FsPermission p = new FsPermission();
|
||||
p.readFields(in);
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode the object to a short.
|
||||
*/
|
||||
public short toShort() {
|
||||
int s = (stickyBit ? 1 << 9 : 0) |
|
||||
(useraction.ordinal() << 6) |
|
||||
(groupaction.ordinal() << 3) |
|
||||
otheraction.ordinal();
|
||||
|
||||
return (short)s;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public boolean equals(Object obj) {
|
||||
if (obj instanceof FsPermission) {
|
||||
FsPermission that = (FsPermission)obj;
|
||||
return this.useraction == that.useraction
|
||||
&& this.groupaction == that.groupaction
|
||||
&& this.otheraction == that.otheraction
|
||||
&& this.stickyBit == that.stickyBit;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public int hashCode() {return toShort();}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public String toString() {
|
||||
String str = useraction.SYMBOL + groupaction.SYMBOL + otheraction.SYMBOL;
|
||||
if(stickyBit) {
|
||||
StringBuilder str2 = new StringBuilder(str);
|
||||
str2.replace(str2.length() - 1, str2.length(),
|
||||
otheraction.implies(FsAction.EXECUTE) ? "t" : "T");
|
||||
str = str2.toString();
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
/** Apply a umask to this permission and return a new one */
|
||||
public FsPermission applyUMask(FsPermission umask) {
|
||||
return new FsPermission(useraction.and(umask.useraction.not()),
|
||||
groupaction.and(umask.groupaction.not()),
|
||||
otheraction.and(umask.otheraction.not()));
|
||||
}
|
||||
|
||||
/** umask property label */
|
||||
public static final String UMASK_LABEL = "dfs.umask";
|
||||
public static final int DEFAULT_UMASK = 0022;
|
||||
|
||||
/** Get the user file creation mask (umask) */
|
||||
public static FsPermission getUMask(Configuration conf) {
|
||||
int umask = DEFAULT_UMASK;
|
||||
if (conf != null) {
|
||||
umask = conf.getInt(UMASK_LABEL, DEFAULT_UMASK);
|
||||
}
|
||||
return new FsPermission((short)umask);
|
||||
}
|
||||
|
||||
public boolean getStickyBit() {
|
||||
return stickyBit;
|
||||
}
|
||||
|
||||
/** Set the user file creation mask (umask) */
|
||||
public static void setUMask(Configuration conf, FsPermission umask) {
|
||||
conf.setInt(UMASK_LABEL, umask.toShort());
|
||||
}
|
||||
|
||||
/** Get the default permission. */
|
||||
public static FsPermission getDefault() {
|
||||
return new FsPermission((short)00777);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a FsPermission from a Unix symbolic permission string
|
||||
* @param unixSymbolicPermission e.g. "-rw-rw-rw-"
|
||||
*/
|
||||
public static FsPermission valueOf(String unixSymbolicPermission) {
|
||||
if (unixSymbolicPermission == null) {
|
||||
return null;
|
||||
}
|
||||
else if (unixSymbolicPermission.length() != 10) {
|
||||
throw new IllegalArgumentException("length != 10(unixSymbolicPermission="
|
||||
+ unixSymbolicPermission + ")");
|
||||
}
|
||||
|
||||
int n = 0;
|
||||
for(int i = 1; i < unixSymbolicPermission.length(); i++) {
|
||||
n = n << 1;
|
||||
char c = unixSymbolicPermission.charAt(i);
|
||||
n += (c == '-' || c == 'T' || c == 'S') ? 0: 1;
|
||||
}
|
||||
|
||||
// Add sticky bit value if set
|
||||
if(unixSymbolicPermission.charAt(9) == 't' ||
|
||||
unixSymbolicPermission.charAt(9) == 'T')
|
||||
n += 01000;
|
||||
|
||||
return new FsPermission((short)n);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.permission;
|
||||
|
||||
import org.apache.hadoop.io.*;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Store permission related information.
|
||||
*/
|
||||
public class PermissionStatus implements Writable {
|
||||
static final WritableFactory FACTORY = new WritableFactory() {
|
||||
public Writable newInstance() { return new PermissionStatus(); }
|
||||
};
|
||||
static { // register a ctor
|
||||
WritableFactories.setFactory(PermissionStatus.class, FACTORY);
|
||||
}
|
||||
|
||||
/** Create an immutable {@link PermissionStatus} object. */
|
||||
public static PermissionStatus createImmutable(
|
||||
String user, String group, FsPermission permission) {
|
||||
return new PermissionStatus(user, group, permission) {
|
||||
public PermissionStatus applyUMask(FsPermission umask) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private String username;
|
||||
private String groupname;
|
||||
private FsPermission permission;
|
||||
|
||||
private PermissionStatus() {}
|
||||
|
||||
/** Constructor */
|
||||
public PermissionStatus(String user, String group, FsPermission permission) {
|
||||
username = user;
|
||||
groupname = group;
|
||||
this.permission = permission;
|
||||
}
|
||||
|
||||
/** Return user name */
|
||||
public String getUserName() {return username;}
|
||||
|
||||
/** Return group name */
|
||||
public String getGroupName() {return groupname;}
|
||||
|
||||
/** Return permission */
|
||||
public FsPermission getPermission() {return permission;}
|
||||
|
||||
/**
|
||||
* Apply umask.
|
||||
* @see FsPermission#applyUMask(FsPermission)
|
||||
*/
|
||||
public PermissionStatus applyUMask(FsPermission umask) {
|
||||
permission = permission.applyUMask(umask);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
username = Text.readString(in);
|
||||
groupname = Text.readString(in);
|
||||
permission = FsPermission.read(in);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void write(DataOutput out) throws IOException {
|
||||
write(out, username, groupname, permission);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and initialize a {@link PermissionStatus} from {@link DataInput}.
|
||||
*/
|
||||
public static PermissionStatus read(DataInput in) throws IOException {
|
||||
PermissionStatus p = new PermissionStatus();
|
||||
p.readFields(in);
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize a {@link PermissionStatus} from its base components.
|
||||
*/
|
||||
public static void write(DataOutput out,
|
||||
String username,
|
||||
String groupname,
|
||||
FsPermission permission) throws IOException {
|
||||
Text.writeString(out, username);
|
||||
Text.writeString(out, groupname);
|
||||
permission.write(out);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public String toString() {
|
||||
return username + ":" + groupname + ":" + permission;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3;
|
||||
|
||||
/**
|
||||
* Holds metadata about a block of data being stored in a {@link FileSystemStore}.
|
||||
*/
|
||||
public class Block {
|
||||
private long id;
|
||||
|
||||
private long length;
|
||||
|
||||
public Block(long id, long length) {
|
||||
this.id = id;
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
public long getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public long getLength() {
|
||||
return length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Block[" + id + ", " + length + "]";
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
/**
|
||||
* A facility for storing and retrieving {@link INode}s and {@link Block}s.
|
||||
*/
|
||||
public interface FileSystemStore {
|
||||
|
||||
void initialize(URI uri, Configuration conf) throws IOException;
|
||||
String getVersion() throws IOException;
|
||||
|
||||
void storeINode(Path path, INode inode) throws IOException;
|
||||
void storeBlock(Block block, File file) throws IOException;
|
||||
|
||||
boolean inodeExists(Path path) throws IOException;
|
||||
boolean blockExists(long blockId) throws IOException;
|
||||
|
||||
INode retrieveINode(Path path) throws IOException;
|
||||
File retrieveBlock(Block block, long byteRangeStart) throws IOException;
|
||||
|
||||
void deleteINode(Path path) throws IOException;
|
||||
void deleteBlock(Block block) throws IOException;
|
||||
|
||||
Set<Path> listSubPaths(Path path) throws IOException;
|
||||
Set<Path> listDeepSubPaths(Path path) throws IOException;
|
||||
|
||||
/**
|
||||
* Delete everything. Used for testing.
|
||||
* @throws IOException
|
||||
*/
|
||||
void purge() throws IOException;
|
||||
|
||||
/**
|
||||
* Diagnostic method to dump all INodes to the console.
|
||||
* @throws IOException
|
||||
*/
|
||||
void dump() throws IOException;
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Holds file metadata including type (regular file, or directory),
|
||||
* and the list of blocks that are pointers to the data.
|
||||
*/
|
||||
public class INode {
|
||||
|
||||
enum FileType {
|
||||
DIRECTORY, FILE
|
||||
}
|
||||
|
||||
public static final FileType[] FILE_TYPES = {
|
||||
FileType.DIRECTORY,
|
||||
FileType.FILE
|
||||
};
|
||||
|
||||
public static final INode DIRECTORY_INODE = new INode(FileType.DIRECTORY, null);
|
||||
|
||||
private FileType fileType;
|
||||
private Block[] blocks;
|
||||
|
||||
public INode(FileType fileType, Block[] blocks) {
|
||||
this.fileType = fileType;
|
||||
if (isDirectory() && blocks != null) {
|
||||
throw new IllegalArgumentException("A directory cannot contain blocks.");
|
||||
}
|
||||
this.blocks = blocks;
|
||||
}
|
||||
|
||||
public Block[] getBlocks() {
|
||||
return blocks;
|
||||
}
|
||||
|
||||
public FileType getFileType() {
|
||||
return fileType;
|
||||
}
|
||||
|
||||
public boolean isDirectory() {
|
||||
return fileType == FileType.DIRECTORY;
|
||||
}
|
||||
|
||||
public boolean isFile() {
|
||||
return fileType == FileType.FILE;
|
||||
}
|
||||
|
||||
public long getSerializedLength() {
|
||||
return 1L + (blocks == null ? 0 : 4 + blocks.length * 16);
|
||||
}
|
||||
|
||||
|
||||
public InputStream serialize() throws IOException {
|
||||
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
|
||||
DataOutputStream out = new DataOutputStream(bytes);
|
||||
out.writeByte(fileType.ordinal());
|
||||
if (isFile()) {
|
||||
out.writeInt(blocks.length);
|
||||
for (int i = 0; i < blocks.length; i++) {
|
||||
out.writeLong(blocks[i].getId());
|
||||
out.writeLong(blocks[i].getLength());
|
||||
}
|
||||
}
|
||||
out.close();
|
||||
return new ByteArrayInputStream(bytes.toByteArray());
|
||||
}
|
||||
|
||||
public static INode deserialize(InputStream in) throws IOException {
|
||||
if (in == null) {
|
||||
return null;
|
||||
}
|
||||
DataInputStream dataIn = new DataInputStream(in);
|
||||
FileType fileType = INode.FILE_TYPES[dataIn.readByte()];
|
||||
switch (fileType) {
|
||||
case DIRECTORY:
|
||||
in.close();
|
||||
return INode.DIRECTORY_INODE;
|
||||
case FILE:
|
||||
int numBlocks = dataIn.readInt();
|
||||
Block[] blocks = new Block[numBlocks];
|
||||
for (int i = 0; i < numBlocks; i++) {
|
||||
long id = dataIn.readLong();
|
||||
long length = dataIn.readLong();
|
||||
blocks[i] = new Block(id, length);
|
||||
}
|
||||
in.close();
|
||||
return new INode(fileType, blocks);
|
||||
default:
|
||||
throw new IllegalArgumentException("Cannot deserialize inode.");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,390 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.URI;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.s3.INode.FileType;
|
||||
import org.jets3t.service.S3Service;
|
||||
import org.jets3t.service.S3ServiceException;
|
||||
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
|
||||
import org.jets3t.service.model.S3Bucket;
|
||||
import org.jets3t.service.model.S3Object;
|
||||
import org.jets3t.service.security.AWSCredentials;
|
||||
|
||||
class Jets3tFileSystemStore implements FileSystemStore {
|
||||
|
||||
private static final String FILE_SYSTEM_NAME = "fs";
|
||||
private static final String FILE_SYSTEM_VALUE = "Hadoop";
|
||||
|
||||
private static final String FILE_SYSTEM_TYPE_NAME = "fs-type";
|
||||
private static final String FILE_SYSTEM_TYPE_VALUE = "block";
|
||||
|
||||
private static final String FILE_SYSTEM_VERSION_NAME = "fs-version";
|
||||
private static final String FILE_SYSTEM_VERSION_VALUE = "1";
|
||||
|
||||
private static final Map<String, String> METADATA =
|
||||
new HashMap<String, String>();
|
||||
|
||||
static {
|
||||
METADATA.put(FILE_SYSTEM_NAME, FILE_SYSTEM_VALUE);
|
||||
METADATA.put(FILE_SYSTEM_TYPE_NAME, FILE_SYSTEM_TYPE_VALUE);
|
||||
METADATA.put(FILE_SYSTEM_VERSION_NAME, FILE_SYSTEM_VERSION_VALUE);
|
||||
}
|
||||
|
||||
private static final String PATH_DELIMITER = Path.SEPARATOR;
|
||||
private static final String BLOCK_PREFIX = "block_";
|
||||
|
||||
private Configuration conf;
|
||||
|
||||
private S3Service s3Service;
|
||||
|
||||
private S3Bucket bucket;
|
||||
|
||||
private int bufferSize;
|
||||
|
||||
private static final Log LOG =
|
||||
LogFactory.getLog(Jets3tFileSystemStore.class.getName());
|
||||
|
||||
public void initialize(URI uri, Configuration conf) throws IOException {
|
||||
|
||||
this.conf = conf;
|
||||
|
||||
S3Credentials s3Credentials = new S3Credentials();
|
||||
s3Credentials.initialize(uri, conf);
|
||||
try {
|
||||
AWSCredentials awsCredentials =
|
||||
new AWSCredentials(s3Credentials.getAccessKey(),
|
||||
s3Credentials.getSecretAccessKey());
|
||||
this.s3Service = new RestS3Service(awsCredentials);
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
bucket = new S3Bucket(uri.getHost());
|
||||
|
||||
this.bufferSize = conf.getInt("io.file.buffer.size", 4096);
|
||||
}
|
||||
|
||||
public String getVersion() throws IOException {
|
||||
return FILE_SYSTEM_VERSION_VALUE;
|
||||
}
|
||||
|
||||
private void delete(String key) throws IOException {
|
||||
try {
|
||||
s3Service.deleteObject(bucket, key);
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void deleteINode(Path path) throws IOException {
|
||||
delete(pathToKey(path));
|
||||
}
|
||||
|
||||
public void deleteBlock(Block block) throws IOException {
|
||||
delete(blockToKey(block));
|
||||
}
|
||||
|
||||
public boolean inodeExists(Path path) throws IOException {
|
||||
InputStream in = get(pathToKey(path), true);
|
||||
if (in == null) {
|
||||
return false;
|
||||
}
|
||||
in.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean blockExists(long blockId) throws IOException {
|
||||
InputStream in = get(blockToKey(blockId), false);
|
||||
if (in == null) {
|
||||
return false;
|
||||
}
|
||||
in.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
private InputStream get(String key, boolean checkMetadata)
|
||||
throws IOException {
|
||||
|
||||
try {
|
||||
S3Object object = s3Service.getObject(bucket, key);
|
||||
if (checkMetadata) {
|
||||
checkMetadata(object);
|
||||
}
|
||||
return object.getDataInputStream();
|
||||
} catch (S3ServiceException e) {
|
||||
if ("NoSuchKey".equals(e.getS3ErrorCode())) {
|
||||
return null;
|
||||
}
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
private InputStream get(String key, long byteRangeStart) throws IOException {
|
||||
try {
|
||||
S3Object object = s3Service.getObject(bucket, key, null, null, null,
|
||||
null, byteRangeStart, null);
|
||||
return object.getDataInputStream();
|
||||
} catch (S3ServiceException e) {
|
||||
if ("NoSuchKey".equals(e.getS3ErrorCode())) {
|
||||
return null;
|
||||
}
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void checkMetadata(S3Object object) throws S3FileSystemException,
|
||||
S3ServiceException {
|
||||
|
||||
String name = (String) object.getMetadata(FILE_SYSTEM_NAME);
|
||||
if (!FILE_SYSTEM_VALUE.equals(name)) {
|
||||
throw new S3FileSystemException("Not a Hadoop S3 file.");
|
||||
}
|
||||
String type = (String) object.getMetadata(FILE_SYSTEM_TYPE_NAME);
|
||||
if (!FILE_SYSTEM_TYPE_VALUE.equals(type)) {
|
||||
throw new S3FileSystemException("Not a block file.");
|
||||
}
|
||||
String dataVersion = (String) object.getMetadata(FILE_SYSTEM_VERSION_NAME);
|
||||
if (!FILE_SYSTEM_VERSION_VALUE.equals(dataVersion)) {
|
||||
throw new VersionMismatchException(FILE_SYSTEM_VERSION_VALUE,
|
||||
dataVersion);
|
||||
}
|
||||
}
|
||||
|
||||
public INode retrieveINode(Path path) throws IOException {
|
||||
return INode.deserialize(get(pathToKey(path), true));
|
||||
}
|
||||
|
||||
public File retrieveBlock(Block block, long byteRangeStart)
|
||||
throws IOException {
|
||||
File fileBlock = null;
|
||||
InputStream in = null;
|
||||
OutputStream out = null;
|
||||
try {
|
||||
fileBlock = newBackupFile();
|
||||
in = get(blockToKey(block), byteRangeStart);
|
||||
out = new BufferedOutputStream(new FileOutputStream(fileBlock));
|
||||
byte[] buf = new byte[bufferSize];
|
||||
int numRead;
|
||||
while ((numRead = in.read(buf)) >= 0) {
|
||||
out.write(buf, 0, numRead);
|
||||
}
|
||||
return fileBlock;
|
||||
} catch (IOException e) {
|
||||
// close output stream to file then delete file
|
||||
closeQuietly(out);
|
||||
out = null; // to prevent a second close
|
||||
if (fileBlock != null) {
|
||||
boolean b = fileBlock.delete();
|
||||
if (!b) {
|
||||
LOG.warn("Ignoring failed delete");
|
||||
}
|
||||
}
|
||||
throw e;
|
||||
} finally {
|
||||
closeQuietly(out);
|
||||
closeQuietly(in);
|
||||
}
|
||||
}
|
||||
|
||||
private File newBackupFile() throws IOException {
|
||||
File dir = new File(conf.get("fs.s3.buffer.dir"));
|
||||
if (!dir.exists() && !dir.mkdirs()) {
|
||||
throw new IOException("Cannot create S3 buffer directory: " + dir);
|
||||
}
|
||||
File result = File.createTempFile("input-", ".tmp", dir);
|
||||
result.deleteOnExit();
|
||||
return result;
|
||||
}
|
||||
|
||||
public Set<Path> listSubPaths(Path path) throws IOException {
|
||||
try {
|
||||
String prefix = pathToKey(path);
|
||||
if (!prefix.endsWith(PATH_DELIMITER)) {
|
||||
prefix += PATH_DELIMITER;
|
||||
}
|
||||
S3Object[] objects = s3Service.listObjects(bucket, prefix, PATH_DELIMITER);
|
||||
Set<Path> prefixes = new TreeSet<Path>();
|
||||
for (int i = 0; i < objects.length; i++) {
|
||||
prefixes.add(keyToPath(objects[i].getKey()));
|
||||
}
|
||||
prefixes.remove(path);
|
||||
return prefixes;
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public Set<Path> listDeepSubPaths(Path path) throws IOException {
|
||||
try {
|
||||
String prefix = pathToKey(path);
|
||||
if (!prefix.endsWith(PATH_DELIMITER)) {
|
||||
prefix += PATH_DELIMITER;
|
||||
}
|
||||
S3Object[] objects = s3Service.listObjects(bucket, prefix, null);
|
||||
Set<Path> prefixes = new TreeSet<Path>();
|
||||
for (int i = 0; i < objects.length; i++) {
|
||||
prefixes.add(keyToPath(objects[i].getKey()));
|
||||
}
|
||||
prefixes.remove(path);
|
||||
return prefixes;
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void put(String key, InputStream in, long length, boolean storeMetadata)
|
||||
throws IOException {
|
||||
|
||||
try {
|
||||
S3Object object = new S3Object(key);
|
||||
object.setDataInputStream(in);
|
||||
object.setContentType("binary/octet-stream");
|
||||
object.setContentLength(length);
|
||||
if (storeMetadata) {
|
||||
object.addAllMetadata(METADATA);
|
||||
}
|
||||
s3Service.putObject(bucket, object);
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void storeINode(Path path, INode inode) throws IOException {
|
||||
put(pathToKey(path), inode.serialize(), inode.getSerializedLength(), true);
|
||||
}
|
||||
|
||||
public void storeBlock(Block block, File file) throws IOException {
|
||||
BufferedInputStream in = null;
|
||||
try {
|
||||
in = new BufferedInputStream(new FileInputStream(file));
|
||||
put(blockToKey(block), in, block.getLength(), false);
|
||||
} finally {
|
||||
closeQuietly(in);
|
||||
}
|
||||
}
|
||||
|
||||
private void closeQuietly(Closeable closeable) {
|
||||
if (closeable != null) {
|
||||
try {
|
||||
closeable.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String pathToKey(Path path) {
|
||||
if (!path.isAbsolute()) {
|
||||
throw new IllegalArgumentException("Path must be absolute: " + path);
|
||||
}
|
||||
return path.toUri().getPath();
|
||||
}
|
||||
|
||||
private Path keyToPath(String key) {
|
||||
return new Path(key);
|
||||
}
|
||||
|
||||
private String blockToKey(long blockId) {
|
||||
return BLOCK_PREFIX + blockId;
|
||||
}
|
||||
|
||||
private String blockToKey(Block block) {
|
||||
return blockToKey(block.getId());
|
||||
}
|
||||
|
||||
public void purge() throws IOException {
|
||||
try {
|
||||
S3Object[] objects = s3Service.listObjects(bucket);
|
||||
for (int i = 0; i < objects.length; i++) {
|
||||
s3Service.deleteObject(bucket, objects[i].getKey());
|
||||
}
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void dump() throws IOException {
|
||||
StringBuilder sb = new StringBuilder("S3 Filesystem, ");
|
||||
sb.append(bucket.getName()).append("\n");
|
||||
try {
|
||||
S3Object[] objects = s3Service.listObjects(bucket, PATH_DELIMITER, null);
|
||||
for (int i = 0; i < objects.length; i++) {
|
||||
Path path = keyToPath(objects[i].getKey());
|
||||
sb.append(path).append("\n");
|
||||
INode m = retrieveINode(path);
|
||||
sb.append("\t").append(m.getFileType()).append("\n");
|
||||
if (m.getFileType() == FileType.DIRECTORY) {
|
||||
continue;
|
||||
}
|
||||
for (int j = 0; j < m.getBlocks().length; j++) {
|
||||
sb.append("\t").append(m.getBlocks()[j]).append("\n");
|
||||
}
|
||||
}
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
System.out.println(sb);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,280 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.s3;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URI;
|
||||
import java.net.URLDecoder;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.util.Tool;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
import org.jets3t.service.S3Service;
|
||||
import org.jets3t.service.S3ServiceException;
|
||||
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
|
||||
import org.jets3t.service.model.S3Bucket;
|
||||
import org.jets3t.service.model.S3Object;
|
||||
import org.jets3t.service.security.AWSCredentials;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This class is a tool for migrating data from an older to a newer version
|
||||
* of an S3 filesystem.
|
||||
* </p>
|
||||
* <p>
|
||||
* All files in the filesystem are migrated by re-writing the block metadata
|
||||
* - no datafiles are touched.
|
||||
* </p>
|
||||
*/
|
||||
public class MigrationTool extends Configured implements Tool {
|
||||
|
||||
private S3Service s3Service;
|
||||
private S3Bucket bucket;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
int res = ToolRunner.run(new MigrationTool(), args);
|
||||
System.exit(res);
|
||||
}
|
||||
|
||||
public int run(String[] args) throws Exception {
|
||||
|
||||
if (args.length == 0) {
|
||||
System.err.println("Usage: MigrationTool <S3 file system URI>");
|
||||
System.err.println("\t<S3 file system URI>\tfilesystem to migrate");
|
||||
ToolRunner.printGenericCommandUsage(System.err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
URI uri = URI.create(args[0]);
|
||||
|
||||
initialize(uri);
|
||||
|
||||
FileSystemStore newStore = new Jets3tFileSystemStore();
|
||||
newStore.initialize(uri, getConf());
|
||||
|
||||
if (get("%2F") != null) {
|
||||
System.err.println("Current version number is [unversioned].");
|
||||
System.err.println("Target version number is " +
|
||||
newStore.getVersion() + ".");
|
||||
Store oldStore = new UnversionedStore();
|
||||
migrate(oldStore, newStore);
|
||||
return 0;
|
||||
} else {
|
||||
S3Object root = get("/");
|
||||
if (root != null) {
|
||||
String version = (String) root.getMetadata("fs-version");
|
||||
if (version == null) {
|
||||
System.err.println("Can't detect version - exiting.");
|
||||
} else {
|
||||
String newVersion = newStore.getVersion();
|
||||
System.err.println("Current version number is " + version + ".");
|
||||
System.err.println("Target version number is " + newVersion + ".");
|
||||
if (version.equals(newStore.getVersion())) {
|
||||
System.err.println("No migration required.");
|
||||
return 0;
|
||||
}
|
||||
// use version number to create Store
|
||||
//Store oldStore = ...
|
||||
//migrate(oldStore, newStore);
|
||||
System.err.println("Not currently implemented.");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
System.err.println("Can't detect version - exiting.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void initialize(URI uri) throws IOException {
|
||||
|
||||
|
||||
|
||||
try {
|
||||
String accessKey = null;
|
||||
String secretAccessKey = null;
|
||||
String userInfo = uri.getUserInfo();
|
||||
if (userInfo != null) {
|
||||
int index = userInfo.indexOf(':');
|
||||
if (index != -1) {
|
||||
accessKey = userInfo.substring(0, index);
|
||||
secretAccessKey = userInfo.substring(index + 1);
|
||||
} else {
|
||||
accessKey = userInfo;
|
||||
}
|
||||
}
|
||||
if (accessKey == null) {
|
||||
accessKey = getConf().get("fs.s3.awsAccessKeyId");
|
||||
}
|
||||
if (secretAccessKey == null) {
|
||||
secretAccessKey = getConf().get("fs.s3.awsSecretAccessKey");
|
||||
}
|
||||
if (accessKey == null && secretAccessKey == null) {
|
||||
throw new IllegalArgumentException("AWS " +
|
||||
"Access Key ID and Secret Access Key " +
|
||||
"must be specified as the username " +
|
||||
"or password (respectively) of a s3 URL, " +
|
||||
"or by setting the " +
|
||||
"fs.s3.awsAccessKeyId or " +
|
||||
"fs.s3.awsSecretAccessKey properties (respectively).");
|
||||
} else if (accessKey == null) {
|
||||
throw new IllegalArgumentException("AWS " +
|
||||
"Access Key ID must be specified " +
|
||||
"as the username of a s3 URL, or by setting the " +
|
||||
"fs.s3.awsAccessKeyId property.");
|
||||
} else if (secretAccessKey == null) {
|
||||
throw new IllegalArgumentException("AWS " +
|
||||
"Secret Access Key must be specified " +
|
||||
"as the password of a s3 URL, or by setting the " +
|
||||
"fs.s3.awsSecretAccessKey property.");
|
||||
}
|
||||
AWSCredentials awsCredentials =
|
||||
new AWSCredentials(accessKey, secretAccessKey);
|
||||
this.s3Service = new RestS3Service(awsCredentials);
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
bucket = new S3Bucket(uri.getHost());
|
||||
}
|
||||
|
||||
private void migrate(Store oldStore, FileSystemStore newStore)
|
||||
throws IOException {
|
||||
for (Path path : oldStore.listAllPaths()) {
|
||||
INode inode = oldStore.retrieveINode(path);
|
||||
oldStore.deleteINode(path);
|
||||
newStore.storeINode(path, inode);
|
||||
}
|
||||
}
|
||||
|
||||
private S3Object get(String key) {
|
||||
try {
|
||||
return s3Service.getObject(bucket, key);
|
||||
} catch (S3ServiceException e) {
|
||||
if ("NoSuchKey".equals(e.getS3ErrorCode())) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
interface Store {
|
||||
|
||||
Set<Path> listAllPaths() throws IOException;
|
||||
INode retrieveINode(Path path) throws IOException;
|
||||
void deleteINode(Path path) throws IOException;
|
||||
|
||||
}
|
||||
|
||||
class UnversionedStore implements Store {
|
||||
|
||||
public Set<Path> listAllPaths() throws IOException {
|
||||
try {
|
||||
String prefix = urlEncode(Path.SEPARATOR);
|
||||
S3Object[] objects = s3Service.listObjects(bucket, prefix, null);
|
||||
Set<Path> prefixes = new TreeSet<Path>();
|
||||
for (int i = 0; i < objects.length; i++) {
|
||||
prefixes.add(keyToPath(objects[i].getKey()));
|
||||
}
|
||||
return prefixes;
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void deleteINode(Path path) throws IOException {
|
||||
delete(pathToKey(path));
|
||||
}
|
||||
|
||||
private void delete(String key) throws IOException {
|
||||
try {
|
||||
s3Service.deleteObject(bucket, key);
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public INode retrieveINode(Path path) throws IOException {
|
||||
return INode.deserialize(get(pathToKey(path)));
|
||||
}
|
||||
|
||||
private InputStream get(String key) throws IOException {
|
||||
try {
|
||||
S3Object object = s3Service.getObject(bucket, key);
|
||||
return object.getDataInputStream();
|
||||
} catch (S3ServiceException e) {
|
||||
if ("NoSuchKey".equals(e.getS3ErrorCode())) {
|
||||
return null;
|
||||
}
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
private String pathToKey(Path path) {
|
||||
if (!path.isAbsolute()) {
|
||||
throw new IllegalArgumentException("Path must be absolute: " + path);
|
||||
}
|
||||
return urlEncode(path.toUri().getPath());
|
||||
}
|
||||
|
||||
private Path keyToPath(String key) {
|
||||
return new Path(urlDecode(key));
|
||||
}
|
||||
|
||||
private String urlEncode(String s) {
|
||||
try {
|
||||
return URLEncoder.encode(s, "UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
// Should never happen since every implementation of the Java Platform
|
||||
// is required to support UTF-8.
|
||||
// See http://java.sun.com/j2se/1.5.0/docs/api/java/nio/charset/Charset.html
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private String urlDecode(String s) {
|
||||
try {
|
||||
return URLDecoder.decode(s, "UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
// Should never happen since every implementation of the Java Platform
|
||||
// is required to support UTF-8.
|
||||
// See http://java.sun.com/j2se/1.5.0/docs/api/java/nio/charset/Charset.html
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,99 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3;
|
||||
|
||||
import java.net.URI;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Extracts AWS credentials from the filesystem URI or configuration.
|
||||
* </p>
|
||||
*/
|
||||
public class S3Credentials {
|
||||
|
||||
private String accessKey;
|
||||
private String secretAccessKey;
|
||||
|
||||
/**
|
||||
* @throws IllegalArgumentException if credentials for S3 cannot be
|
||||
* determined.
|
||||
*/
|
||||
public void initialize(URI uri, Configuration conf) {
|
||||
if (uri.getHost() == null) {
|
||||
throw new IllegalArgumentException("Invalid hostname in URI " + uri);
|
||||
}
|
||||
|
||||
String userInfo = uri.getUserInfo();
|
||||
if (userInfo != null) {
|
||||
int index = userInfo.indexOf(':');
|
||||
if (index != -1) {
|
||||
accessKey = userInfo.substring(0, index);
|
||||
secretAccessKey = userInfo.substring(index + 1);
|
||||
} else {
|
||||
accessKey = userInfo;
|
||||
}
|
||||
}
|
||||
|
||||
String scheme = uri.getScheme();
|
||||
String accessKeyProperty = String.format("fs.%s.awsAccessKeyId", scheme);
|
||||
String secretAccessKeyProperty =
|
||||
String.format("fs.%s.awsSecretAccessKey", scheme);
|
||||
if (accessKey == null) {
|
||||
accessKey = conf.get(accessKeyProperty);
|
||||
}
|
||||
if (secretAccessKey == null) {
|
||||
secretAccessKey = conf.get(secretAccessKeyProperty);
|
||||
}
|
||||
if (accessKey == null && secretAccessKey == null) {
|
||||
throw new IllegalArgumentException("AWS " +
|
||||
"Access Key ID and Secret Access " +
|
||||
"Key must be specified as the " +
|
||||
"username or password " +
|
||||
"(respectively) of a " + scheme +
|
||||
" URL, or by setting the " +
|
||||
accessKeyProperty + " or " +
|
||||
secretAccessKeyProperty +
|
||||
" properties (respectively).");
|
||||
} else if (accessKey == null) {
|
||||
throw new IllegalArgumentException("AWS " +
|
||||
"Access Key ID must be specified " +
|
||||
"as the username of a " + scheme +
|
||||
" URL, or by setting the " +
|
||||
accessKeyProperty + " property.");
|
||||
} else if (secretAccessKey == null) {
|
||||
throw new IllegalArgumentException("AWS " +
|
||||
"Secret Access Key must be " +
|
||||
"specified as the password of a " +
|
||||
scheme + " URL, or by setting the " +
|
||||
secretAccessKeyProperty +
|
||||
" property.");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public String getAccessKey() {
|
||||
return accessKey;
|
||||
}
|
||||
|
||||
public String getSecretAccessKey() {
|
||||
return secretAccessKey;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Thrown if there is a problem communicating with Amazon S3.
|
||||
*/
|
||||
public class S3Exception extends IOException {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
public S3Exception(Throwable t) {
|
||||
super(t);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,361 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.fs.s3native.NativeS3FileSystem;
|
||||
import org.apache.hadoop.io.retry.RetryPolicies;
|
||||
import org.apache.hadoop.io.retry.RetryPolicy;
|
||||
import org.apache.hadoop.io.retry.RetryProxy;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A block-based {@link FileSystem} backed by
|
||||
* <a href="http://aws.amazon.com/s3">Amazon S3</a>.
|
||||
* </p>
|
||||
* @see NativeS3FileSystem
|
||||
*/
|
||||
public class S3FileSystem extends FileSystem {
|
||||
|
||||
private URI uri;
|
||||
|
||||
private FileSystemStore store;
|
||||
|
||||
private Path workingDir;
|
||||
|
||||
public S3FileSystem() {
|
||||
// set store in initialize()
|
||||
}
|
||||
|
||||
public S3FileSystem(FileSystemStore store) {
|
||||
this.store = store;
|
||||
}
|
||||
|
||||
@Override
|
||||
public URI getUri() {
|
||||
return uri;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize(URI uri, Configuration conf) throws IOException {
|
||||
super.initialize(uri, conf);
|
||||
if (store == null) {
|
||||
store = createDefaultStore(conf);
|
||||
}
|
||||
store.initialize(uri, conf);
|
||||
setConf(conf);
|
||||
this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
|
||||
this.workingDir =
|
||||
new Path("/user", System.getProperty("user.name")).makeQualified(this);
|
||||
}
|
||||
|
||||
private static FileSystemStore createDefaultStore(Configuration conf) {
|
||||
FileSystemStore store = new Jets3tFileSystemStore();
|
||||
|
||||
RetryPolicy basePolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(
|
||||
conf.getInt("fs.s3.maxRetries", 4),
|
||||
conf.getLong("fs.s3.sleepTimeSeconds", 10), TimeUnit.SECONDS);
|
||||
Map<Class<? extends Exception>,RetryPolicy> exceptionToPolicyMap =
|
||||
new HashMap<Class<? extends Exception>, RetryPolicy>();
|
||||
exceptionToPolicyMap.put(IOException.class, basePolicy);
|
||||
exceptionToPolicyMap.put(S3Exception.class, basePolicy);
|
||||
|
||||
RetryPolicy methodPolicy = RetryPolicies.retryByException(
|
||||
RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
|
||||
Map<String,RetryPolicy> methodNameToPolicyMap = new HashMap<String,RetryPolicy>();
|
||||
methodNameToPolicyMap.put("storeBlock", methodPolicy);
|
||||
methodNameToPolicyMap.put("retrieveBlock", methodPolicy);
|
||||
|
||||
return (FileSystemStore) RetryProxy.create(FileSystemStore.class,
|
||||
store, methodNameToPolicyMap);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getWorkingDirectory() {
|
||||
return workingDir;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setWorkingDirectory(Path dir) {
|
||||
workingDir = makeAbsolute(dir);
|
||||
}
|
||||
|
||||
private Path makeAbsolute(Path path) {
|
||||
if (path.isAbsolute()) {
|
||||
return path;
|
||||
}
|
||||
return new Path(workingDir, path);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param permission Currently ignored.
|
||||
*/
|
||||
@Override
|
||||
public boolean mkdirs(Path path, FsPermission permission) throws IOException {
|
||||
Path absolutePath = makeAbsolute(path);
|
||||
List<Path> paths = new ArrayList<Path>();
|
||||
do {
|
||||
paths.add(0, absolutePath);
|
||||
absolutePath = absolutePath.getParent();
|
||||
} while (absolutePath != null);
|
||||
|
||||
boolean result = true;
|
||||
for (Path p : paths) {
|
||||
result &= mkdir(p);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private boolean mkdir(Path path) throws IOException {
|
||||
Path absolutePath = makeAbsolute(path);
|
||||
INode inode = store.retrieveINode(absolutePath);
|
||||
if (inode == null) {
|
||||
store.storeINode(absolutePath, INode.DIRECTORY_INODE);
|
||||
} else if (inode.isFile()) {
|
||||
throw new IOException(String.format(
|
||||
"Can't make directory for path %s since it is a file.",
|
||||
absolutePath));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isFile(Path path) throws IOException {
|
||||
INode inode = store.retrieveINode(makeAbsolute(path));
|
||||
if (inode == null) {
|
||||
return false;
|
||||
}
|
||||
return inode.isFile();
|
||||
}
|
||||
|
||||
private INode checkFile(Path path) throws IOException {
|
||||
INode inode = store.retrieveINode(makeAbsolute(path));
|
||||
if (inode == null) {
|
||||
throw new IOException("No such file.");
|
||||
}
|
||||
if (inode.isDirectory()) {
|
||||
throw new IOException("Path " + path + " is a directory.");
|
||||
}
|
||||
return inode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileStatus[] listStatus(Path f) throws IOException {
|
||||
Path absolutePath = makeAbsolute(f);
|
||||
INode inode = store.retrieveINode(absolutePath);
|
||||
if (inode == null) {
|
||||
return null;
|
||||
}
|
||||
if (inode.isFile()) {
|
||||
return new FileStatus[] {
|
||||
new S3FileStatus(f.makeQualified(this), inode)
|
||||
};
|
||||
}
|
||||
ArrayList<FileStatus> ret = new ArrayList<FileStatus>();
|
||||
for (Path p : store.listSubPaths(absolutePath)) {
|
||||
ret.add(getFileStatus(p.makeQualified(this)));
|
||||
}
|
||||
return ret.toArray(new FileStatus[0]);
|
||||
}
|
||||
|
||||
/** This optional operation is not yet supported. */
|
||||
public FSDataOutputStream append(Path f, int bufferSize,
|
||||
Progressable progress) throws IOException {
|
||||
throw new IOException("Not supported");
|
||||
}
|
||||
|
||||
/**
|
||||
* @param permission Currently ignored.
|
||||
*/
|
||||
@Override
|
||||
public FSDataOutputStream create(Path file, FsPermission permission,
|
||||
boolean overwrite, int bufferSize,
|
||||
short replication, long blockSize, Progressable progress)
|
||||
throws IOException {
|
||||
|
||||
INode inode = store.retrieveINode(makeAbsolute(file));
|
||||
if (inode != null) {
|
||||
if (overwrite) {
|
||||
delete(file, true);
|
||||
} else {
|
||||
throw new IOException("File already exists: " + file);
|
||||
}
|
||||
} else {
|
||||
Path parent = file.getParent();
|
||||
if (parent != null) {
|
||||
if (!mkdirs(parent)) {
|
||||
throw new IOException("Mkdirs failed to create " + parent.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
return new FSDataOutputStream
|
||||
(new S3OutputStream(getConf(), store, makeAbsolute(file),
|
||||
blockSize, progress, bufferSize),
|
||||
statistics);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataInputStream open(Path path, int bufferSize) throws IOException {
|
||||
INode inode = checkFile(path);
|
||||
return new FSDataInputStream(new S3InputStream(getConf(), store, inode,
|
||||
statistics));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean rename(Path src, Path dst) throws IOException {
|
||||
Path absoluteSrc = makeAbsolute(src);
|
||||
INode srcINode = store.retrieveINode(absoluteSrc);
|
||||
if (srcINode == null) {
|
||||
// src path doesn't exist
|
||||
return false;
|
||||
}
|
||||
Path absoluteDst = makeAbsolute(dst);
|
||||
INode dstINode = store.retrieveINode(absoluteDst);
|
||||
if (dstINode != null && dstINode.isDirectory()) {
|
||||
absoluteDst = new Path(absoluteDst, absoluteSrc.getName());
|
||||
dstINode = store.retrieveINode(absoluteDst);
|
||||
}
|
||||
if (dstINode != null) {
|
||||
// dst path already exists - can't overwrite
|
||||
return false;
|
||||
}
|
||||
Path dstParent = absoluteDst.getParent();
|
||||
if (dstParent != null) {
|
||||
INode dstParentINode = store.retrieveINode(dstParent);
|
||||
if (dstParentINode == null || dstParentINode.isFile()) {
|
||||
// dst parent doesn't exist or is a file
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return renameRecursive(absoluteSrc, absoluteDst);
|
||||
}
|
||||
|
||||
private boolean renameRecursive(Path src, Path dst) throws IOException {
|
||||
INode srcINode = store.retrieveINode(src);
|
||||
store.storeINode(dst, srcINode);
|
||||
store.deleteINode(src);
|
||||
if (srcINode.isDirectory()) {
|
||||
for (Path oldSrc : store.listDeepSubPaths(src)) {
|
||||
INode inode = store.retrieveINode(oldSrc);
|
||||
if (inode == null) {
|
||||
return false;
|
||||
}
|
||||
String oldSrcPath = oldSrc.toUri().getPath();
|
||||
String srcPath = src.toUri().getPath();
|
||||
String dstPath = dst.toUri().getPath();
|
||||
Path newDst = new Path(oldSrcPath.replaceFirst(srcPath, dstPath));
|
||||
store.storeINode(newDst, inode);
|
||||
store.deleteINode(oldSrc);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean delete(Path path, boolean recursive) throws IOException {
|
||||
Path absolutePath = makeAbsolute(path);
|
||||
INode inode = store.retrieveINode(absolutePath);
|
||||
if (inode == null) {
|
||||
return false;
|
||||
}
|
||||
if (inode.isFile()) {
|
||||
store.deleteINode(absolutePath);
|
||||
for (Block block: inode.getBlocks()) {
|
||||
store.deleteBlock(block);
|
||||
}
|
||||
} else {
|
||||
FileStatus[] contents = listStatus(absolutePath);
|
||||
if (contents == null) {
|
||||
return false;
|
||||
}
|
||||
if ((contents.length !=0) && (!recursive)) {
|
||||
throw new IOException("Directory " + path.toString()
|
||||
+ " is not empty.");
|
||||
}
|
||||
for (FileStatus p:contents) {
|
||||
if (!delete(p.getPath(), recursive)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
store.deleteINode(absolutePath);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* FileStatus for S3 file systems.
|
||||
*/
|
||||
@Override
|
||||
public FileStatus getFileStatus(Path f) throws IOException {
|
||||
INode inode = store.retrieveINode(makeAbsolute(f));
|
||||
if (inode == null) {
|
||||
throw new FileNotFoundException(f + ": No such file or directory.");
|
||||
}
|
||||
return new S3FileStatus(f.makeQualified(this), inode);
|
||||
}
|
||||
|
||||
// diagnostic methods
|
||||
|
||||
void dump() throws IOException {
|
||||
store.dump();
|
||||
}
|
||||
|
||||
void purge() throws IOException {
|
||||
store.purge();
|
||||
}
|
||||
|
||||
private static class S3FileStatus extends FileStatus {
|
||||
|
||||
S3FileStatus(Path f, INode inode) throws IOException {
|
||||
super(findLength(inode), inode.isDirectory(), 1,
|
||||
findBlocksize(inode), 0, f);
|
||||
}
|
||||
|
||||
private static long findLength(INode inode) {
|
||||
if (!inode.isDirectory()) {
|
||||
long length = 0L;
|
||||
for (Block block : inode.getBlocks()) {
|
||||
length += block.getLength();
|
||||
}
|
||||
return length;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static long findBlocksize(INode inode) {
|
||||
final Block[] ret = inode.getBlocks();
|
||||
return ret == null ? 0L : ret[0].getLength();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.s3;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Thrown when there is a fatal exception while using {@link S3FileSystem}.
|
||||
*/
|
||||
public class S3FileSystemException extends IOException {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
public S3FileSystemException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,211 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSInputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
||||
class S3InputStream extends FSInputStream {
|
||||
|
||||
private FileSystemStore store;
|
||||
|
||||
private Block[] blocks;
|
||||
|
||||
private boolean closed;
|
||||
|
||||
private long fileLength;
|
||||
|
||||
private long pos = 0;
|
||||
|
||||
private File blockFile;
|
||||
|
||||
private DataInputStream blockStream;
|
||||
|
||||
private long blockEnd = -1;
|
||||
|
||||
private FileSystem.Statistics stats;
|
||||
|
||||
private static final Log LOG =
|
||||
LogFactory.getLog(S3InputStream.class.getName());
|
||||
|
||||
|
||||
@Deprecated
|
||||
public S3InputStream(Configuration conf, FileSystemStore store,
|
||||
INode inode) {
|
||||
this(conf, store, inode, null);
|
||||
}
|
||||
|
||||
public S3InputStream(Configuration conf, FileSystemStore store,
|
||||
INode inode, FileSystem.Statistics stats) {
|
||||
|
||||
this.store = store;
|
||||
this.stats = stats;
|
||||
this.blocks = inode.getBlocks();
|
||||
for (Block block : blocks) {
|
||||
this.fileLength += block.getLength();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized long getPos() throws IOException {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int available() throws IOException {
|
||||
return (int) (fileLength - pos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void seek(long targetPos) throws IOException {
|
||||
if (targetPos > fileLength) {
|
||||
throw new IOException("Cannot seek after EOF");
|
||||
}
|
||||
pos = targetPos;
|
||||
blockEnd = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean seekToNewSource(long targetPos) throws IOException {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int read() throws IOException {
|
||||
if (closed) {
|
||||
throw new IOException("Stream closed");
|
||||
}
|
||||
int result = -1;
|
||||
if (pos < fileLength) {
|
||||
if (pos > blockEnd) {
|
||||
blockSeekTo(pos);
|
||||
}
|
||||
result = blockStream.read();
|
||||
if (result >= 0) {
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
if (stats != null & result >= 0) {
|
||||
stats.incrementBytesRead(1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int read(byte buf[], int off, int len) throws IOException {
|
||||
if (closed) {
|
||||
throw new IOException("Stream closed");
|
||||
}
|
||||
if (pos < fileLength) {
|
||||
if (pos > blockEnd) {
|
||||
blockSeekTo(pos);
|
||||
}
|
||||
int realLen = Math.min(len, (int) (blockEnd - pos + 1));
|
||||
int result = blockStream.read(buf, off, realLen);
|
||||
if (result >= 0) {
|
||||
pos += result;
|
||||
}
|
||||
if (stats != null && result > 0) {
|
||||
stats.incrementBytesRead(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
private synchronized void blockSeekTo(long target) throws IOException {
|
||||
//
|
||||
// Compute desired block
|
||||
//
|
||||
int targetBlock = -1;
|
||||
long targetBlockStart = 0;
|
||||
long targetBlockEnd = 0;
|
||||
for (int i = 0; i < blocks.length; i++) {
|
||||
long blockLength = blocks[i].getLength();
|
||||
targetBlockEnd = targetBlockStart + blockLength - 1;
|
||||
|
||||
if (target >= targetBlockStart && target <= targetBlockEnd) {
|
||||
targetBlock = i;
|
||||
break;
|
||||
} else {
|
||||
targetBlockStart = targetBlockEnd + 1;
|
||||
}
|
||||
}
|
||||
if (targetBlock < 0) {
|
||||
throw new IOException(
|
||||
"Impossible situation: could not find target position " + target);
|
||||
}
|
||||
long offsetIntoBlock = target - targetBlockStart;
|
||||
|
||||
// read block blocks[targetBlock] from position offsetIntoBlock
|
||||
|
||||
this.blockFile = store.retrieveBlock(blocks[targetBlock], offsetIntoBlock);
|
||||
|
||||
this.pos = target;
|
||||
this.blockEnd = targetBlockEnd;
|
||||
this.blockStream = new DataInputStream(new FileInputStream(blockFile));
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (closed) {
|
||||
return;
|
||||
}
|
||||
if (blockStream != null) {
|
||||
blockStream.close();
|
||||
blockStream = null;
|
||||
}
|
||||
if (blockFile != null) {
|
||||
boolean b = blockFile.delete();
|
||||
if (!b) {
|
||||
LOG.warn("Ignoring failed delete");
|
||||
}
|
||||
}
|
||||
super.close();
|
||||
closed = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* We don't support marks.
|
||||
*/
|
||||
@Override
|
||||
public boolean markSupported() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void mark(int readLimit) {
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
throw new IOException("Mark not supported");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,231 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.s3.INode.FileType;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
class S3OutputStream extends OutputStream {
|
||||
|
||||
private Configuration conf;
|
||||
|
||||
private int bufferSize;
|
||||
|
||||
private FileSystemStore store;
|
||||
|
||||
private Path path;
|
||||
|
||||
private long blockSize;
|
||||
|
||||
private File backupFile;
|
||||
|
||||
private OutputStream backupStream;
|
||||
|
||||
private Random r = new Random();
|
||||
|
||||
private boolean closed;
|
||||
|
||||
private int pos = 0;
|
||||
|
||||
private long filePos = 0;
|
||||
|
||||
private int bytesWrittenToBlock = 0;
|
||||
|
||||
private byte[] outBuf;
|
||||
|
||||
private List<Block> blocks = new ArrayList<Block>();
|
||||
|
||||
private Block nextBlock;
|
||||
|
||||
private static final Log LOG =
|
||||
LogFactory.getLog(S3OutputStream.class.getName());
|
||||
|
||||
|
||||
public S3OutputStream(Configuration conf, FileSystemStore store,
|
||||
Path path, long blockSize, Progressable progress,
|
||||
int buffersize) throws IOException {
|
||||
|
||||
this.conf = conf;
|
||||
this.store = store;
|
||||
this.path = path;
|
||||
this.blockSize = blockSize;
|
||||
this.backupFile = newBackupFile();
|
||||
this.backupStream = new FileOutputStream(backupFile);
|
||||
this.bufferSize = buffersize;
|
||||
this.outBuf = new byte[bufferSize];
|
||||
|
||||
}
|
||||
|
||||
private File newBackupFile() throws IOException {
|
||||
File dir = new File(conf.get("fs.s3.buffer.dir"));
|
||||
if (!dir.exists() && !dir.mkdirs()) {
|
||||
throw new IOException("Cannot create S3 buffer directory: " + dir);
|
||||
}
|
||||
File result = File.createTempFile("output-", ".tmp", dir);
|
||||
result.deleteOnExit();
|
||||
return result;
|
||||
}
|
||||
|
||||
public long getPos() throws IOException {
|
||||
return filePos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void write(int b) throws IOException {
|
||||
if (closed) {
|
||||
throw new IOException("Stream closed");
|
||||
}
|
||||
|
||||
if ((bytesWrittenToBlock + pos == blockSize) || (pos >= bufferSize)) {
|
||||
flush();
|
||||
}
|
||||
outBuf[pos++] = (byte) b;
|
||||
filePos++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void write(byte b[], int off, int len) throws IOException {
|
||||
if (closed) {
|
||||
throw new IOException("Stream closed");
|
||||
}
|
||||
while (len > 0) {
|
||||
int remaining = bufferSize - pos;
|
||||
int toWrite = Math.min(remaining, len);
|
||||
System.arraycopy(b, off, outBuf, pos, toWrite);
|
||||
pos += toWrite;
|
||||
off += toWrite;
|
||||
len -= toWrite;
|
||||
filePos += toWrite;
|
||||
|
||||
if ((bytesWrittenToBlock + pos >= blockSize) || (pos == bufferSize)) {
|
||||
flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void flush() throws IOException {
|
||||
if (closed) {
|
||||
throw new IOException("Stream closed");
|
||||
}
|
||||
|
||||
if (bytesWrittenToBlock + pos >= blockSize) {
|
||||
flushData((int) blockSize - bytesWrittenToBlock);
|
||||
}
|
||||
if (bytesWrittenToBlock == blockSize) {
|
||||
endBlock();
|
||||
}
|
||||
flushData(pos);
|
||||
}
|
||||
|
||||
private synchronized void flushData(int maxPos) throws IOException {
|
||||
int workingPos = Math.min(pos, maxPos);
|
||||
|
||||
if (workingPos > 0) {
|
||||
//
|
||||
// To the local block backup, write just the bytes
|
||||
//
|
||||
backupStream.write(outBuf, 0, workingPos);
|
||||
|
||||
//
|
||||
// Track position
|
||||
//
|
||||
bytesWrittenToBlock += workingPos;
|
||||
System.arraycopy(outBuf, workingPos, outBuf, 0, pos - workingPos);
|
||||
pos -= workingPos;
|
||||
}
|
||||
}
|
||||
|
||||
private synchronized void endBlock() throws IOException {
|
||||
//
|
||||
// Done with local copy
|
||||
//
|
||||
backupStream.close();
|
||||
|
||||
//
|
||||
// Send it to S3
|
||||
//
|
||||
// TODO: Use passed in Progressable to report progress.
|
||||
nextBlockOutputStream();
|
||||
store.storeBlock(nextBlock, backupFile);
|
||||
internalClose();
|
||||
|
||||
//
|
||||
// Delete local backup, start new one
|
||||
//
|
||||
boolean b = backupFile.delete();
|
||||
if (!b) {
|
||||
LOG.warn("Ignoring failed delete");
|
||||
}
|
||||
backupFile = newBackupFile();
|
||||
backupStream = new FileOutputStream(backupFile);
|
||||
bytesWrittenToBlock = 0;
|
||||
}
|
||||
|
||||
private synchronized void nextBlockOutputStream() throws IOException {
|
||||
long blockId = r.nextLong();
|
||||
while (store.blockExists(blockId)) {
|
||||
blockId = r.nextLong();
|
||||
}
|
||||
nextBlock = new Block(blockId, bytesWrittenToBlock);
|
||||
blocks.add(nextBlock);
|
||||
bytesWrittenToBlock = 0;
|
||||
}
|
||||
|
||||
private synchronized void internalClose() throws IOException {
|
||||
INode inode = new INode(FileType.FILE, blocks.toArray(new Block[blocks
|
||||
.size()]));
|
||||
store.storeINode(path, inode);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() throws IOException {
|
||||
if (closed) {
|
||||
return;
|
||||
}
|
||||
|
||||
flush();
|
||||
if (filePos == 0 || bytesWrittenToBlock != 0) {
|
||||
endBlock();
|
||||
}
|
||||
|
||||
backupStream.close();
|
||||
boolean b = backupFile.delete();
|
||||
if (!b) {
|
||||
LOG.warn("Ignoring failed delete");
|
||||
}
|
||||
|
||||
super.close();
|
||||
|
||||
closed = true;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.s3;
|
||||
|
||||
/**
|
||||
* Thrown when Hadoop cannot read the version of the data stored
|
||||
* in {@link S3FileSystem}.
|
||||
*/
|
||||
public class VersionMismatchException extends S3FileSystemException {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
public VersionMismatchException(String clientVersion, String dataVersion) {
|
||||
super("Version mismatch: client expects version " + clientVersion +
|
||||
", but data has version " +
|
||||
(dataVersion == null ? "[unversioned]" : dataVersion));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
<html>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<body>
|
||||
|
||||
<p>A distributed, block-based implementation of {@link
|
||||
org.apache.hadoop.fs.FileSystem} that uses <a href="http://aws.amazon.com/s3">Amazon S3</a>
|
||||
as a backing store.</p>
|
||||
|
||||
<p>
|
||||
Files are stored in S3 as blocks (represented by
|
||||
{@link org.apache.hadoop.fs.s3.Block}), which have an ID and a length.
|
||||
Block metadata is stored in S3 as a small record (represented by
|
||||
{@link org.apache.hadoop.fs.s3.INode}) using the URL-encoded
|
||||
path string as a key. Inodes record the file type (regular file or directory) and the list of blocks.
|
||||
This design makes it easy to seek to any given position in a file by reading the inode data to compute
|
||||
which block to access, then using S3's support for
|
||||
<a href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.2">HTTP Range</a> headers
|
||||
to start streaming from the correct position.
|
||||
Renames are also efficient since only the inode is moved (by a DELETE followed by a PUT since
|
||||
S3 does not support renames).
|
||||
</p>
|
||||
<p>
|
||||
For a single file <i>/dir1/file1</i> which takes two blocks of storage, the file structure in S3
|
||||
would be something like this:
|
||||
</p>
|
||||
<pre>
|
||||
/
|
||||
/dir1
|
||||
/dir1/file1
|
||||
block-6415776850131549260
|
||||
block-3026438247347758425
|
||||
</pre>
|
||||
<p>
|
||||
Inodes start with a leading <code>/</code>, while blocks are prefixed with <code>block-</code>.
|
||||
</p>
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,54 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3native;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Holds basic metadata for a file stored in a {@link NativeFileSystemStore}.
|
||||
* </p>
|
||||
*/
|
||||
class FileMetadata {
|
||||
private final String key;
|
||||
private final long length;
|
||||
private final long lastModified;
|
||||
|
||||
public FileMetadata(String key, long length, long lastModified) {
|
||||
this.key = key;
|
||||
this.length = length;
|
||||
this.lastModified = lastModified;
|
||||
}
|
||||
|
||||
public String getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
public long getLength() {
|
||||
return length;
|
||||
}
|
||||
|
||||
public long getLastModified() {
|
||||
return lastModified;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FileMetadata[" + key + ", " + length + ", " + lastModified + "]";
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,255 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3native;
|
||||
|
||||
import static org.apache.hadoop.fs.s3native.NativeS3FileSystem.PATH_DELIMITER;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URI;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.s3.S3Credentials;
|
||||
import org.apache.hadoop.fs.s3.S3Exception;
|
||||
import org.jets3t.service.S3ObjectsChunk;
|
||||
import org.jets3t.service.S3Service;
|
||||
import org.jets3t.service.S3ServiceException;
|
||||
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
|
||||
import org.jets3t.service.model.S3Bucket;
|
||||
import org.jets3t.service.model.S3Object;
|
||||
import org.jets3t.service.security.AWSCredentials;
|
||||
|
||||
class Jets3tNativeFileSystemStore implements NativeFileSystemStore {
|
||||
|
||||
private S3Service s3Service;
|
||||
private S3Bucket bucket;
|
||||
|
||||
public void initialize(URI uri, Configuration conf) throws IOException {
|
||||
S3Credentials s3Credentials = new S3Credentials();
|
||||
s3Credentials.initialize(uri, conf);
|
||||
try {
|
||||
AWSCredentials awsCredentials =
|
||||
new AWSCredentials(s3Credentials.getAccessKey(),
|
||||
s3Credentials.getSecretAccessKey());
|
||||
this.s3Service = new RestS3Service(awsCredentials);
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
bucket = new S3Bucket(uri.getHost());
|
||||
}
|
||||
|
||||
public void storeFile(String key, File file, byte[] md5Hash)
|
||||
throws IOException {
|
||||
|
||||
BufferedInputStream in = null;
|
||||
try {
|
||||
in = new BufferedInputStream(new FileInputStream(file));
|
||||
S3Object object = new S3Object(key);
|
||||
object.setDataInputStream(in);
|
||||
object.setContentType("binary/octet-stream");
|
||||
object.setContentLength(file.length());
|
||||
if (md5Hash != null) {
|
||||
object.setMd5Hash(md5Hash);
|
||||
}
|
||||
s3Service.putObject(bucket, object);
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
} finally {
|
||||
if (in != null) {
|
||||
try {
|
||||
in.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void storeEmptyFile(String key) throws IOException {
|
||||
try {
|
||||
S3Object object = new S3Object(key);
|
||||
object.setDataInputStream(new ByteArrayInputStream(new byte[0]));
|
||||
object.setContentType("binary/octet-stream");
|
||||
object.setContentLength(0);
|
||||
s3Service.putObject(bucket, object);
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public FileMetadata retrieveMetadata(String key) throws IOException {
|
||||
try {
|
||||
S3Object object = s3Service.getObjectDetails(bucket, key);
|
||||
return new FileMetadata(key, object.getContentLength(),
|
||||
object.getLastModifiedDate().getTime());
|
||||
} catch (S3ServiceException e) {
|
||||
// Following is brittle. Is there a better way?
|
||||
if (e.getMessage().contains("ResponseCode=404")) {
|
||||
return null;
|
||||
}
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public InputStream retrieve(String key) throws IOException {
|
||||
try {
|
||||
S3Object object = s3Service.getObject(bucket, key);
|
||||
return object.getDataInputStream();
|
||||
} catch (S3ServiceException e) {
|
||||
if ("NoSuchKey".equals(e.getS3ErrorCode())) {
|
||||
return null;
|
||||
}
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public InputStream retrieve(String key, long byteRangeStart)
|
||||
throws IOException {
|
||||
try {
|
||||
S3Object object = s3Service.getObject(bucket, key, null, null, null,
|
||||
null, byteRangeStart, null);
|
||||
return object.getDataInputStream();
|
||||
} catch (S3ServiceException e) {
|
||||
if ("NoSuchKey".equals(e.getS3ErrorCode())) {
|
||||
return null;
|
||||
}
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public PartialListing list(String prefix, int maxListingLength)
|
||||
throws IOException {
|
||||
return list(prefix, maxListingLength, null);
|
||||
}
|
||||
|
||||
public PartialListing list(String prefix, int maxListingLength,
|
||||
String priorLastKey) throws IOException {
|
||||
|
||||
return list(prefix, PATH_DELIMITER, maxListingLength, priorLastKey);
|
||||
}
|
||||
|
||||
public PartialListing listAll(String prefix, int maxListingLength,
|
||||
String priorLastKey) throws IOException {
|
||||
|
||||
return list(prefix, null, maxListingLength, priorLastKey);
|
||||
}
|
||||
|
||||
private PartialListing list(String prefix, String delimiter,
|
||||
int maxListingLength, String priorLastKey) throws IOException {
|
||||
try {
|
||||
if (prefix.length() > 0 && !prefix.endsWith(PATH_DELIMITER)) {
|
||||
prefix += PATH_DELIMITER;
|
||||
}
|
||||
S3ObjectsChunk chunk = s3Service.listObjectsChunked(bucket.getName(),
|
||||
prefix, delimiter, maxListingLength, priorLastKey);
|
||||
|
||||
FileMetadata[] fileMetadata =
|
||||
new FileMetadata[chunk.getObjects().length];
|
||||
for (int i = 0; i < fileMetadata.length; i++) {
|
||||
S3Object object = chunk.getObjects()[i];
|
||||
fileMetadata[i] = new FileMetadata(object.getKey(),
|
||||
object.getContentLength(), object.getLastModifiedDate().getTime());
|
||||
}
|
||||
return new PartialListing(chunk.getPriorLastKey(), fileMetadata,
|
||||
chunk.getCommonPrefixes());
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void delete(String key) throws IOException {
|
||||
try {
|
||||
s3Service.deleteObject(bucket, key);
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void rename(String srcKey, String dstKey) throws IOException {
|
||||
try {
|
||||
s3Service.moveObject(bucket.getName(), srcKey, bucket.getName(),
|
||||
new S3Object(dstKey), false);
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void purge(String prefix) throws IOException {
|
||||
try {
|
||||
S3Object[] objects = s3Service.listObjects(bucket, prefix, null);
|
||||
for (int i = 0; i < objects.length; i++) {
|
||||
s3Service.deleteObject(bucket, objects[i].getKey());
|
||||
}
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void dump() throws IOException {
|
||||
StringBuilder sb = new StringBuilder("S3 Native Filesystem, ");
|
||||
sb.append(bucket.getName()).append("\n");
|
||||
try {
|
||||
S3Object[] objects = s3Service.listObjects(bucket);
|
||||
for (int i = 0; i < objects.length; i++) {
|
||||
sb.append(objects[i].getKey()).append("\n");
|
||||
}
|
||||
} catch (S3ServiceException e) {
|
||||
if (e.getCause() instanceof IOException) {
|
||||
throw (IOException) e.getCause();
|
||||
}
|
||||
throw new S3Exception(e);
|
||||
}
|
||||
System.out.println(sb);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3native;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URI;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* An abstraction for a key-based {@link File} store.
|
||||
* </p>
|
||||
*/
|
||||
interface NativeFileSystemStore {
|
||||
|
||||
void initialize(URI uri, Configuration conf) throws IOException;
|
||||
|
||||
void storeFile(String key, File file, byte[] md5Hash) throws IOException;
|
||||
void storeEmptyFile(String key) throws IOException;
|
||||
|
||||
FileMetadata retrieveMetadata(String key) throws IOException;
|
||||
InputStream retrieve(String key) throws IOException;
|
||||
InputStream retrieve(String key, long byteRangeStart) throws IOException;
|
||||
|
||||
PartialListing list(String prefix, int maxListingLength) throws IOException;
|
||||
PartialListing list(String prefix, int maxListingLength, String priorLastKey)
|
||||
throws IOException;
|
||||
PartialListing listAll(String prefix, int maxListingLength,
|
||||
String priorLastKey) throws IOException;
|
||||
|
||||
void delete(String key) throws IOException;
|
||||
|
||||
void rename(String srcKey, String dstKey) throws IOException;
|
||||
|
||||
/**
|
||||
* Delete all keys with the given prefix. Used for testing.
|
||||
* @throws IOException
|
||||
*/
|
||||
void purge(String prefix) throws IOException;
|
||||
|
||||
/**
|
||||
* Diagnostic method to dump state to the console.
|
||||
* @throws IOException
|
||||
*/
|
||||
void dump() throws IOException;
|
||||
}
|
|
@ -0,0 +1,578 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3native;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.URI;
|
||||
import java.security.DigestOutputStream;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.BufferedFSInputStream;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FSInputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.fs.s3.S3Exception;
|
||||
import org.apache.hadoop.io.retry.RetryPolicies;
|
||||
import org.apache.hadoop.io.retry.RetryPolicy;
|
||||
import org.apache.hadoop.io.retry.RetryProxy;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A {@link FileSystem} for reading and writing files stored on
|
||||
* <a href="http://aws.amazon.com/s3">Amazon S3</a>.
|
||||
* Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation
|
||||
* stores files on S3 in their
|
||||
* native form so they can be read by other S3 tools.
|
||||
* </p>
|
||||
* @see org.apache.hadoop.fs.s3.S3FileSystem
|
||||
*/
|
||||
public class NativeS3FileSystem extends FileSystem {
|
||||
|
||||
public static final Log LOG =
|
||||
LogFactory.getLog(NativeS3FileSystem.class);
|
||||
|
||||
private static final String FOLDER_SUFFIX = "_$folder$";
|
||||
private static final long MAX_S3_FILE_SIZE = 5 * 1024 * 1024 * 1024L;
|
||||
static final String PATH_DELIMITER = Path.SEPARATOR;
|
||||
private static final int S3_MAX_LISTING_LENGTH = 1000;
|
||||
|
||||
private class NativeS3FsInputStream extends FSInputStream {
|
||||
|
||||
private InputStream in;
|
||||
private final String key;
|
||||
private long pos = 0;
|
||||
|
||||
public NativeS3FsInputStream(InputStream in, String key) {
|
||||
this.in = in;
|
||||
this.key = key;
|
||||
}
|
||||
|
||||
public synchronized int read() throws IOException {
|
||||
int result = in.read();
|
||||
if (result != -1) {
|
||||
pos++;
|
||||
}
|
||||
if (statistics != null && result != -1) {
|
||||
statistics.incrementBytesRead(1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
public synchronized int read(byte[] b, int off, int len)
|
||||
throws IOException {
|
||||
|
||||
int result = in.read(b, off, len);
|
||||
if (result > 0) {
|
||||
pos += result;
|
||||
}
|
||||
if (statistics != null && result > 0) {
|
||||
statistics.incrementBytesRead(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
in.close();
|
||||
}
|
||||
|
||||
public synchronized void seek(long pos) throws IOException {
|
||||
in.close();
|
||||
in = store.retrieve(key, pos);
|
||||
this.pos = pos;
|
||||
}
|
||||
public synchronized long getPos() throws IOException {
|
||||
return pos;
|
||||
}
|
||||
public boolean seekToNewSource(long targetPos) throws IOException {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private class NativeS3FsOutputStream extends OutputStream {
|
||||
|
||||
private Configuration conf;
|
||||
private String key;
|
||||
private File backupFile;
|
||||
private OutputStream backupStream;
|
||||
private MessageDigest digest;
|
||||
private boolean closed;
|
||||
|
||||
public NativeS3FsOutputStream(Configuration conf,
|
||||
NativeFileSystemStore store, String key, Progressable progress,
|
||||
int bufferSize) throws IOException {
|
||||
this.conf = conf;
|
||||
this.key = key;
|
||||
this.backupFile = newBackupFile();
|
||||
try {
|
||||
this.digest = MessageDigest.getInstance("MD5");
|
||||
this.backupStream = new BufferedOutputStream(new DigestOutputStream(
|
||||
new FileOutputStream(backupFile), this.digest));
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
LOG.warn("Cannot load MD5 digest algorithm," +
|
||||
"skipping message integrity check.", e);
|
||||
this.backupStream = new BufferedOutputStream(
|
||||
new FileOutputStream(backupFile));
|
||||
}
|
||||
}
|
||||
|
||||
private File newBackupFile() throws IOException {
|
||||
File dir = new File(conf.get("fs.s3.buffer.dir"));
|
||||
if (!dir.mkdirs() && !dir.exists()) {
|
||||
throw new IOException("Cannot create S3 buffer directory: " + dir);
|
||||
}
|
||||
File result = File.createTempFile("output-", ".tmp", dir);
|
||||
result.deleteOnExit();
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
backupStream.flush();
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() throws IOException {
|
||||
if (closed) {
|
||||
return;
|
||||
}
|
||||
|
||||
backupStream.close();
|
||||
|
||||
try {
|
||||
byte[] md5Hash = digest == null ? null : digest.digest();
|
||||
store.storeFile(key, backupFile, md5Hash);
|
||||
} finally {
|
||||
if (!backupFile.delete()) {
|
||||
LOG.warn("Could not delete temporary s3n file: " + backupFile);
|
||||
}
|
||||
super.close();
|
||||
closed = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(int b) throws IOException {
|
||||
backupStream.write(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(byte[] b, int off, int len) throws IOException {
|
||||
backupStream.write(b, off, len);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private URI uri;
|
||||
private NativeFileSystemStore store;
|
||||
private Path workingDir;
|
||||
|
||||
public NativeS3FileSystem() {
|
||||
// set store in initialize()
|
||||
}
|
||||
|
||||
public NativeS3FileSystem(NativeFileSystemStore store) {
|
||||
this.store = store;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize(URI uri, Configuration conf) throws IOException {
|
||||
super.initialize(uri, conf);
|
||||
if (store == null) {
|
||||
store = createDefaultStore(conf);
|
||||
}
|
||||
store.initialize(uri, conf);
|
||||
setConf(conf);
|
||||
this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
|
||||
this.workingDir =
|
||||
new Path("/user", System.getProperty("user.name")).makeQualified(this);
|
||||
}
|
||||
|
||||
private static NativeFileSystemStore createDefaultStore(Configuration conf) {
|
||||
NativeFileSystemStore store = new Jets3tNativeFileSystemStore();
|
||||
|
||||
RetryPolicy basePolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(
|
||||
conf.getInt("fs.s3.maxRetries", 4),
|
||||
conf.getLong("fs.s3.sleepTimeSeconds", 10), TimeUnit.SECONDS);
|
||||
Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap =
|
||||
new HashMap<Class<? extends Exception>, RetryPolicy>();
|
||||
exceptionToPolicyMap.put(IOException.class, basePolicy);
|
||||
exceptionToPolicyMap.put(S3Exception.class, basePolicy);
|
||||
|
||||
RetryPolicy methodPolicy = RetryPolicies.retryByException(
|
||||
RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
|
||||
Map<String, RetryPolicy> methodNameToPolicyMap =
|
||||
new HashMap<String, RetryPolicy>();
|
||||
methodNameToPolicyMap.put("storeFile", methodPolicy);
|
||||
|
||||
return (NativeFileSystemStore)
|
||||
RetryProxy.create(NativeFileSystemStore.class, store,
|
||||
methodNameToPolicyMap);
|
||||
}
|
||||
|
||||
private static String pathToKey(Path path) {
|
||||
if (!path.isAbsolute()) {
|
||||
throw new IllegalArgumentException("Path must be absolute: " + path);
|
||||
}
|
||||
return path.toUri().getPath().substring(1); // remove initial slash
|
||||
}
|
||||
|
||||
private static Path keyToPath(String key) {
|
||||
return new Path("/" + key);
|
||||
}
|
||||
|
||||
private Path makeAbsolute(Path path) {
|
||||
if (path.isAbsolute()) {
|
||||
return path;
|
||||
}
|
||||
return new Path(workingDir, path);
|
||||
}
|
||||
|
||||
/** This optional operation is not yet supported. */
|
||||
public FSDataOutputStream append(Path f, int bufferSize,
|
||||
Progressable progress) throws IOException {
|
||||
throw new IOException("Not supported");
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataOutputStream create(Path f, FsPermission permission,
|
||||
boolean overwrite, int bufferSize, short replication, long blockSize,
|
||||
Progressable progress) throws IOException {
|
||||
|
||||
if (exists(f) && !overwrite) {
|
||||
throw new IOException("File already exists:"+f);
|
||||
}
|
||||
Path absolutePath = makeAbsolute(f);
|
||||
String key = pathToKey(absolutePath);
|
||||
return new FSDataOutputStream(new NativeS3FsOutputStream(getConf(), store,
|
||||
key, progress, bufferSize), statistics);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean delete(Path f, boolean recursive) throws IOException {
|
||||
FileStatus status;
|
||||
try {
|
||||
status = getFileStatus(f);
|
||||
} catch (FileNotFoundException e) {
|
||||
return false;
|
||||
}
|
||||
Path absolutePath = makeAbsolute(f);
|
||||
String key = pathToKey(absolutePath);
|
||||
if (status.isDir()) {
|
||||
FileStatus[] contents = listStatus(f);
|
||||
if (!recursive && contents.length > 0) {
|
||||
throw new IOException("Directory " + f.toString() + " is not empty.");
|
||||
}
|
||||
for (FileStatus p : contents) {
|
||||
if (!delete(p.getPath(), recursive)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
store.delete(key + FOLDER_SUFFIX);
|
||||
} else {
|
||||
store.delete(key);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileStatus getFileStatus(Path f) throws IOException {
|
||||
|
||||
Path absolutePath = makeAbsolute(f);
|
||||
String key = pathToKey(absolutePath);
|
||||
|
||||
if (key.length() == 0) { // root always exists
|
||||
return newDirectory(absolutePath);
|
||||
}
|
||||
|
||||
FileMetadata meta = store.retrieveMetadata(key);
|
||||
if (meta != null) {
|
||||
return newFile(meta, absolutePath);
|
||||
}
|
||||
if (store.retrieveMetadata(key + FOLDER_SUFFIX) != null) {
|
||||
return newDirectory(absolutePath);
|
||||
}
|
||||
|
||||
PartialListing listing = store.list(key, 1);
|
||||
if (listing.getFiles().length > 0 ||
|
||||
listing.getCommonPrefixes().length > 0) {
|
||||
return newDirectory(absolutePath);
|
||||
}
|
||||
|
||||
throw new FileNotFoundException(absolutePath +
|
||||
": No such file or directory.");
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public URI getUri() {
|
||||
return uri;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* If <code>f</code> is a file, this method will make a single call to S3.
|
||||
* If <code>f</code> is a directory, this method will make a maximum of
|
||||
* (<i>n</i> / 1000) + 2 calls to S3, where <i>n</i> is the total number of
|
||||
* files and directories contained directly in <code>f</code>.
|
||||
* </p>
|
||||
*/
|
||||
@Override
|
||||
public FileStatus[] listStatus(Path f) throws IOException {
|
||||
|
||||
Path absolutePath = makeAbsolute(f);
|
||||
String key = pathToKey(absolutePath);
|
||||
|
||||
if (key.length() > 0) {
|
||||
FileMetadata meta = store.retrieveMetadata(key);
|
||||
if (meta != null) {
|
||||
return new FileStatus[] { newFile(meta, absolutePath) };
|
||||
}
|
||||
}
|
||||
|
||||
URI pathUri = absolutePath.toUri();
|
||||
Set<FileStatus> status = new TreeSet<FileStatus>();
|
||||
String priorLastKey = null;
|
||||
do {
|
||||
PartialListing listing = store.list(key, S3_MAX_LISTING_LENGTH,
|
||||
priorLastKey);
|
||||
for (FileMetadata fileMetadata : listing.getFiles()) {
|
||||
Path subpath = keyToPath(fileMetadata.getKey());
|
||||
String relativePath = pathUri.relativize(subpath.toUri()).getPath();
|
||||
if (relativePath.endsWith(FOLDER_SUFFIX)) {
|
||||
status.add(newDirectory(new Path(absolutePath,
|
||||
relativePath.substring(0,
|
||||
relativePath.indexOf(FOLDER_SUFFIX)))));
|
||||
} else {
|
||||
status.add(newFile(fileMetadata, subpath));
|
||||
}
|
||||
}
|
||||
for (String commonPrefix : listing.getCommonPrefixes()) {
|
||||
Path subpath = keyToPath(commonPrefix);
|
||||
String relativePath = pathUri.relativize(subpath.toUri()).getPath();
|
||||
status.add(newDirectory(new Path(absolutePath, relativePath)));
|
||||
}
|
||||
priorLastKey = listing.getPriorLastKey();
|
||||
} while (priorLastKey != null);
|
||||
|
||||
if (status.isEmpty() &&
|
||||
store.retrieveMetadata(key + FOLDER_SUFFIX) == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return status.toArray(new FileStatus[0]);
|
||||
}
|
||||
|
||||
private FileStatus newFile(FileMetadata meta, Path path) {
|
||||
return new FileStatus(meta.getLength(), false, 1, MAX_S3_FILE_SIZE,
|
||||
meta.getLastModified(), path.makeQualified(this));
|
||||
}
|
||||
|
||||
private FileStatus newDirectory(Path path) {
|
||||
return new FileStatus(0, true, 1, MAX_S3_FILE_SIZE, 0,
|
||||
path.makeQualified(this));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean mkdirs(Path f, FsPermission permission) throws IOException {
|
||||
Path absolutePath = makeAbsolute(f);
|
||||
List<Path> paths = new ArrayList<Path>();
|
||||
do {
|
||||
paths.add(0, absolutePath);
|
||||
absolutePath = absolutePath.getParent();
|
||||
} while (absolutePath != null);
|
||||
|
||||
boolean result = true;
|
||||
for (Path path : paths) {
|
||||
result &= mkdir(path);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private boolean mkdir(Path f) throws IOException {
|
||||
try {
|
||||
FileStatus fileStatus = getFileStatus(f);
|
||||
if (!fileStatus.isDir()) {
|
||||
throw new IOException(String.format(
|
||||
"Can't make directory for path %s since it is a file.", f));
|
||||
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
String key = pathToKey(f) + FOLDER_SUFFIX;
|
||||
store.storeEmptyFile(key);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
|
||||
if (!exists(f)) {
|
||||
throw new FileNotFoundException(f.toString());
|
||||
}
|
||||
Path absolutePath = makeAbsolute(f);
|
||||
String key = pathToKey(absolutePath);
|
||||
return new FSDataInputStream(new BufferedFSInputStream(
|
||||
new NativeS3FsInputStream(store.retrieve(key), key), bufferSize));
|
||||
}
|
||||
|
||||
// rename() and delete() use this method to ensure that the parent directory
|
||||
// of the source does not vanish.
|
||||
private void createParent(Path path) throws IOException {
|
||||
Path parent = path.getParent();
|
||||
if (parent != null) {
|
||||
String key = pathToKey(makeAbsolute(parent));
|
||||
if (key.length() > 0) {
|
||||
store.storeEmptyFile(key + FOLDER_SUFFIX);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean existsAndIsFile(Path f) throws IOException {
|
||||
|
||||
Path absolutePath = makeAbsolute(f);
|
||||
String key = pathToKey(absolutePath);
|
||||
|
||||
if (key.length() == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
FileMetadata meta = store.retrieveMetadata(key);
|
||||
if (meta != null) {
|
||||
// S3 object with given key exists, so this is a file
|
||||
return true;
|
||||
}
|
||||
|
||||
if (store.retrieveMetadata(key + FOLDER_SUFFIX) != null) {
|
||||
// Signifies empty directory
|
||||
return false;
|
||||
}
|
||||
|
||||
PartialListing listing = store.list(key, 1, null);
|
||||
if (listing.getFiles().length > 0 ||
|
||||
listing.getCommonPrefixes().length > 0) {
|
||||
// Non-empty directory
|
||||
return false;
|
||||
}
|
||||
|
||||
throw new FileNotFoundException(absolutePath +
|
||||
": No such file or directory");
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean rename(Path src, Path dst) throws IOException {
|
||||
|
||||
String srcKey = pathToKey(makeAbsolute(src));
|
||||
|
||||
if (srcKey.length() == 0) {
|
||||
// Cannot rename root of file system
|
||||
return false;
|
||||
}
|
||||
|
||||
// Figure out the final destination
|
||||
String dstKey;
|
||||
try {
|
||||
boolean dstIsFile = existsAndIsFile(dst);
|
||||
if (dstIsFile) {
|
||||
// Attempting to overwrite a file using rename()
|
||||
return false;
|
||||
} else {
|
||||
// Move to within the existent directory
|
||||
dstKey = pathToKey(makeAbsolute(new Path(dst, src.getName())));
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
// dst doesn't exist, so we can proceed
|
||||
dstKey = pathToKey(makeAbsolute(dst));
|
||||
try {
|
||||
if (!getFileStatus(dst.getParent()).isDir()) {
|
||||
return false; // parent dst is a file
|
||||
}
|
||||
} catch (FileNotFoundException ex) {
|
||||
return false; // parent dst does not exist
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
boolean srcIsFile = existsAndIsFile(src);
|
||||
if (srcIsFile) {
|
||||
store.rename(srcKey, dstKey);
|
||||
} else {
|
||||
// Move the folder object
|
||||
store.delete(srcKey + FOLDER_SUFFIX);
|
||||
store.storeEmptyFile(dstKey + FOLDER_SUFFIX);
|
||||
|
||||
// Move everything inside the folder
|
||||
String priorLastKey = null;
|
||||
do {
|
||||
PartialListing listing = store.listAll(srcKey, S3_MAX_LISTING_LENGTH,
|
||||
priorLastKey);
|
||||
for (FileMetadata file : listing.getFiles()) {
|
||||
store.rename(file.getKey(), dstKey
|
||||
+ file.getKey().substring(srcKey.length()));
|
||||
}
|
||||
priorLastKey = listing.getPriorLastKey();
|
||||
} while (priorLastKey != null);
|
||||
}
|
||||
|
||||
createParent(src);
|
||||
return true;
|
||||
|
||||
} catch (FileNotFoundException e) {
|
||||
// Source file does not exist;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set the working directory to the given directory.
|
||||
*/
|
||||
@Override
|
||||
public void setWorkingDirectory(Path newDir) {
|
||||
workingDir = newDir;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getWorkingDirectory() {
|
||||
return workingDir;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.s3native;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Holds information on a directory listing for a
|
||||
* {@link NativeFileSystemStore}.
|
||||
* This includes the {@link FileMetadata files} and directories
|
||||
* (their names) contained in a directory.
|
||||
* </p>
|
||||
* <p>
|
||||
* This listing may be returned in chunks, so a <code>priorLastKey</code>
|
||||
* is provided so that the next chunk may be requested.
|
||||
* </p>
|
||||
* @see NativeFileSystemStore#list(String, int, String)
|
||||
*/
|
||||
class PartialListing {
|
||||
|
||||
private final String priorLastKey;
|
||||
private final FileMetadata[] files;
|
||||
private final String[] commonPrefixes;
|
||||
|
||||
public PartialListing(String priorLastKey, FileMetadata[] files,
|
||||
String[] commonPrefixes) {
|
||||
this.priorLastKey = priorLastKey;
|
||||
this.files = files;
|
||||
this.commonPrefixes = commonPrefixes;
|
||||
}
|
||||
|
||||
public FileMetadata[] getFiles() {
|
||||
return files;
|
||||
}
|
||||
|
||||
public String[] getCommonPrefixes() {
|
||||
return commonPrefixes;
|
||||
}
|
||||
|
||||
public String getPriorLastKey() {
|
||||
return priorLastKey;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
<html>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<body>
|
||||
|
||||
<p>
|
||||
A distributed implementation of {@link
|
||||
org.apache.hadoop.fs.FileSystem} for reading and writing files on
|
||||
<a href="http://aws.amazon.com/s3">Amazon S3</a>.
|
||||
Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem}, which is block-based,
|
||||
this implementation stores
|
||||
files on S3 in their native form for interoperability with other S3 tools.
|
||||
</p>
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,86 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.shell;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.ipc.RemoteException;
|
||||
|
||||
/**
|
||||
* An abstract class for the execution of a file system command
|
||||
*/
|
||||
abstract public class Command extends Configured {
|
||||
protected String[] args;
|
||||
|
||||
/** Constructor */
|
||||
protected Command(Configuration conf) {
|
||||
super(conf);
|
||||
}
|
||||
|
||||
/** Return the command's name excluding the leading character - */
|
||||
abstract public String getCommandName();
|
||||
|
||||
/**
|
||||
* Execute the command on the input path
|
||||
*
|
||||
* @param path the input path
|
||||
* @throws IOException if any error occurs
|
||||
*/
|
||||
abstract protected void run(Path path) throws IOException;
|
||||
|
||||
/**
|
||||
* For each source path, execute the command
|
||||
*
|
||||
* @return 0 if it runs successfully; -1 if it fails
|
||||
*/
|
||||
public int runAll() {
|
||||
int exitCode = 0;
|
||||
for (String src : args) {
|
||||
try {
|
||||
Path srcPath = new Path(src);
|
||||
FileSystem fs = srcPath.getFileSystem(getConf());
|
||||
FileStatus[] statuses = fs.globStatus(srcPath);
|
||||
if (statuses == null) {
|
||||
System.err.println("Can not find listing for " + src);
|
||||
exitCode = -1;
|
||||
} else {
|
||||
for(FileStatus s : statuses) {
|
||||
run(s.getPath());
|
||||
}
|
||||
}
|
||||
} catch (RemoteException re) {
|
||||
exitCode = -1;
|
||||
String content = re.getLocalizedMessage();
|
||||
int eol = content.indexOf('\n');
|
||||
if (eol>=0) {
|
||||
content = content.substring(0, eol);
|
||||
}
|
||||
System.err.println(getCommandName() + ": " + content);
|
||||
} catch (IOException e) {
|
||||
exitCode = -1;
|
||||
System.err.println(getCommandName() + ": " + e.getLocalizedMessage());
|
||||
}
|
||||
}
|
||||
return exitCode;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.shell;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Parse the args of a command and check the format of args.
|
||||
*/
|
||||
public class CommandFormat {
|
||||
final String name;
|
||||
final int minPar, maxPar;
|
||||
final Map<String, Boolean> options = new HashMap<String, Boolean>();
|
||||
|
||||
/** constructor */
|
||||
public CommandFormat(String n, int min, int max, String ... possibleOpt) {
|
||||
name = n;
|
||||
minPar = min;
|
||||
maxPar = max;
|
||||
for(String opt : possibleOpt)
|
||||
options.put(opt, Boolean.FALSE);
|
||||
}
|
||||
|
||||
/** Parse parameters starting from the given position
|
||||
*
|
||||
* @param args an array of input arguments
|
||||
* @param pos the position at which starts to parse
|
||||
* @return a list of parameters
|
||||
*/
|
||||
public List<String> parse(String[] args, int pos) {
|
||||
List<String> parameters = new ArrayList<String>();
|
||||
for(; pos < args.length; pos++) {
|
||||
if (args[pos].charAt(0) == '-' && args[pos].length() > 1) {
|
||||
String opt = args[pos].substring(1);
|
||||
if (options.containsKey(opt))
|
||||
options.put(opt, Boolean.TRUE);
|
||||
else
|
||||
throw new IllegalArgumentException("Illegal option " + args[pos]);
|
||||
}
|
||||
else
|
||||
parameters.add(args[pos]);
|
||||
}
|
||||
int psize = parameters.size();
|
||||
if (psize < minPar || psize > maxPar)
|
||||
throw new IllegalArgumentException("Illegal number of arguments");
|
||||
return parameters;
|
||||
}
|
||||
|
||||
/** Return if the option is set or not
|
||||
*
|
||||
* @param option String representation of an option
|
||||
* @return true is the option is set; false otherwise
|
||||
*/
|
||||
public boolean getOpt(String option) {
|
||||
return options.get(option);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.shell;
|
||||
|
||||
final class CommandUtils {
|
||||
static String formatDescription(String usage, String... desciptions) {
|
||||
StringBuilder b = new StringBuilder(usage + ": " + desciptions[0]);
|
||||
for(int i = 1; i < desciptions.length; i++) {
|
||||
b.append("\n\t\t" + desciptions[i]);
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.fs.shell;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
/**
|
||||
* Count the number of directories, files, bytes, quota, and remaining quota.
|
||||
*/
|
||||
public class Count extends Command {
|
||||
public static final String NAME = "count";
|
||||
public static final String USAGE = "-" + NAME + "[-q] <path>";
|
||||
public static final String DESCRIPTION = CommandUtils.formatDescription(USAGE,
|
||||
"Count the number of directories, files and bytes under the paths",
|
||||
"that match the specified file pattern. The output columns are:",
|
||||
"DIR_COUNT FILE_COUNT CONTENT_SIZE FILE_NAME or",
|
||||
"QUOTA REMAINING_QUATA SPACE_QUOTA REMAINING_SPACE_QUOTA ",
|
||||
" DIR_COUNT FILE_COUNT CONTENT_SIZE FILE_NAME");
|
||||
|
||||
private boolean qOption;
|
||||
|
||||
/** Constructor
|
||||
*
|
||||
* @param cmd the count command
|
||||
* @param pos the starting index of the arguments
|
||||
*/
|
||||
public Count(String[] cmd, int pos, Configuration conf) {
|
||||
super(conf);
|
||||
CommandFormat c = new CommandFormat(NAME, 1, Integer.MAX_VALUE, "q");
|
||||
List<String> parameters = c.parse(cmd, pos);
|
||||
this.args = parameters.toArray(new String[parameters.size()]);
|
||||
if (this.args.length == 0) { // default path is the current working directory
|
||||
this.args = new String[] {"."};
|
||||
}
|
||||
this.qOption = c.getOpt("q") ? true: false;
|
||||
}
|
||||
|
||||
/** Check if a command is the count command
|
||||
*
|
||||
* @param cmd A string representation of a command starting with "-"
|
||||
* @return true if this is a count command; false otherwise
|
||||
*/
|
||||
public static boolean matches(String cmd) {
|
||||
return ("-" + NAME).equals(cmd);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCommandName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void run(Path path) throws IOException {
|
||||
FileSystem fs = path.getFileSystem(getConf());
|
||||
System.out.println(fs.getContentSummary(path).toString(qOption) + path);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.http;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A container class for javax.servlet.Filter.
|
||||
*/
|
||||
public interface FilterContainer {
|
||||
/**
|
||||
* Add a filter to the container.
|
||||
* @param name Filter name
|
||||
* @param classname Filter class name
|
||||
* @param parameters a map from parameter names to initial values
|
||||
*/
|
||||
void addFilter(String name, String classname, Map<String, String> parameters);
|
||||
/**
|
||||
* Add a global filter to the container.
|
||||
* @param name filter name
|
||||
* @param classname filter class name
|
||||
* @param parameters a map from parameter names to initial values
|
||||
*/
|
||||
void addGlobalFilter(String name, String classname, Map<String, String> parameters);
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.http;
|
||||
|
||||
/**
|
||||
* Initialize a javax.servlet.Filter.
|
||||
*/
|
||||
public abstract class FilterInitializer {
|
||||
/**
|
||||
* Initialize a Filter to a FilterContainer.
|
||||
* @param container The filter container
|
||||
*/
|
||||
abstract void initFilter(FilterContainer container);
|
||||
}
|
|
@ -0,0 +1,519 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.http;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.net.BindException;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.nio.channels.ServerSocketChannel;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServlet;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.log.LogLevel;
|
||||
import org.apache.hadoop.metrics.MetricsServlet;
|
||||
import org.apache.hadoop.util.ReflectionUtils;
|
||||
|
||||
import org.mortbay.jetty.Connector;
|
||||
import org.mortbay.jetty.Handler;
|
||||
import org.mortbay.jetty.Server;
|
||||
import org.mortbay.jetty.handler.ContextHandlerCollection;
|
||||
import org.mortbay.jetty.nio.SelectChannelConnector;
|
||||
import org.mortbay.jetty.security.SslSocketConnector;
|
||||
import org.mortbay.jetty.servlet.Context;
|
||||
import org.mortbay.jetty.servlet.DefaultServlet;
|
||||
import org.mortbay.jetty.servlet.FilterHolder;
|
||||
import org.mortbay.jetty.servlet.FilterMapping;
|
||||
import org.mortbay.jetty.servlet.ServletHandler;
|
||||
import org.mortbay.jetty.servlet.ServletHolder;
|
||||
import org.mortbay.jetty.webapp.WebAppContext;
|
||||
import org.mortbay.thread.QueuedThreadPool;
|
||||
import org.mortbay.util.MultiException;
|
||||
|
||||
/**
|
||||
* Create a Jetty embedded server to answer http requests. The primary goal
|
||||
* is to serve up status information for the server.
|
||||
* There are three contexts:
|
||||
* "/logs/" -> points to the log directory
|
||||
* "/static/" -> points to common static files (src/webapps/static)
|
||||
* "/" -> the jsp server code from (src/webapps/<name>)
|
||||
*/
|
||||
public class HttpServer implements FilterContainer {
|
||||
public static final Log LOG = LogFactory.getLog(HttpServer.class);
|
||||
|
||||
static final String FILTER_INITIALIZER_PROPERTY
|
||||
= "hadoop.http.filter.initializers";
|
||||
|
||||
protected final Server webServer;
|
||||
protected final Connector listener;
|
||||
protected final WebAppContext webAppContext;
|
||||
protected final boolean findPort;
|
||||
protected final Map<Context, Boolean> defaultContexts =
|
||||
new HashMap<Context, Boolean>();
|
||||
protected final List<String> filterNames = new ArrayList<String>();
|
||||
private static final int MAX_RETRIES = 10;
|
||||
|
||||
/** Same as this(name, bindAddress, port, findPort, null); */
|
||||
public HttpServer(String name, String bindAddress, int port, boolean findPort
|
||||
) throws IOException {
|
||||
this(name, bindAddress, port, findPort, new Configuration());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a status server on the given port.
|
||||
* The jsp scripts are taken from src/webapps/<name>.
|
||||
* @param name The name of the server
|
||||
* @param port The port to use on the server
|
||||
* @param findPort whether the server should start at the given port and
|
||||
* increment by 1 until it finds a free port.
|
||||
* @param conf Configuration
|
||||
*/
|
||||
public HttpServer(String name, String bindAddress, int port,
|
||||
boolean findPort, Configuration conf) throws IOException {
|
||||
webServer = new Server();
|
||||
this.findPort = findPort;
|
||||
|
||||
listener = createBaseListener(conf);
|
||||
listener.setHost(bindAddress);
|
||||
listener.setPort(port);
|
||||
webServer.addConnector(listener);
|
||||
|
||||
webServer.setThreadPool(new QueuedThreadPool());
|
||||
|
||||
final String appDir = getWebAppsPath();
|
||||
ContextHandlerCollection contexts = new ContextHandlerCollection();
|
||||
webServer.setHandler(contexts);
|
||||
|
||||
webAppContext = new WebAppContext();
|
||||
webAppContext.setContextPath("/");
|
||||
webAppContext.setWar(appDir + "/" + name);
|
||||
webServer.addHandler(webAppContext);
|
||||
|
||||
addDefaultApps(contexts, appDir);
|
||||
|
||||
final FilterInitializer[] initializers = getFilterInitializers(conf);
|
||||
if (initializers != null) {
|
||||
for(FilterInitializer c : initializers) {
|
||||
c.initFilter(this);
|
||||
}
|
||||
}
|
||||
addDefaultServlets();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a required listener for the Jetty instance listening on the port
|
||||
* provided. This wrapper and all subclasses must create at least one
|
||||
* listener.
|
||||
*/
|
||||
protected Connector createBaseListener(Configuration conf)
|
||||
throws IOException {
|
||||
SelectChannelConnector ret = new SelectChannelConnector();
|
||||
ret.setLowResourceMaxIdleTime(10000);
|
||||
ret.setAcceptQueueSize(128);
|
||||
ret.setResolveNames(false);
|
||||
ret.setUseDirectBuffers(false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Get an array of FilterConfiguration specified in the conf */
|
||||
private static FilterInitializer[] getFilterInitializers(Configuration conf) {
|
||||
if (conf == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Class<?>[] classes = conf.getClasses(FILTER_INITIALIZER_PROPERTY);
|
||||
if (classes == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
FilterInitializer[] initializers = new FilterInitializer[classes.length];
|
||||
for(int i = 0; i < classes.length; i++) {
|
||||
initializers[i] = (FilterInitializer)ReflectionUtils.newInstance(
|
||||
classes[i], conf);
|
||||
}
|
||||
return initializers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add default apps.
|
||||
* @param appDir The application directory
|
||||
* @throws IOException
|
||||
*/
|
||||
protected void addDefaultApps(ContextHandlerCollection parent,
|
||||
final String appDir) throws IOException {
|
||||
// set up the context for "/logs/" if "hadoop.log.dir" property is defined.
|
||||
String logDir = System.getProperty("hadoop.log.dir");
|
||||
if (logDir != null) {
|
||||
Context logContext = new Context(parent, "/logs");
|
||||
logContext.setResourceBase(logDir);
|
||||
logContext.addServlet(DefaultServlet.class, "/");
|
||||
defaultContexts.put(logContext, true);
|
||||
}
|
||||
// set up the context for "/static/*"
|
||||
Context staticContext = new Context(parent, "/static");
|
||||
staticContext.setResourceBase(appDir + "/static");
|
||||
staticContext.addServlet(DefaultServlet.class, "/*");
|
||||
defaultContexts.put(staticContext, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add default servlets.
|
||||
*/
|
||||
protected void addDefaultServlets() {
|
||||
// set up default servlets
|
||||
addServlet("stacks", "/stacks", StackServlet.class);
|
||||
addServlet("logLevel", "/logLevel", LogLevel.Servlet.class);
|
||||
addServlet("metrics", "/metrics", MetricsServlet.class);
|
||||
}
|
||||
|
||||
public void addContext(Context ctxt, boolean isFiltered)
|
||||
throws IOException {
|
||||
webServer.addHandler(ctxt);
|
||||
defaultContexts.put(ctxt, isFiltered);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a context
|
||||
* @param pathSpec The path spec for the context
|
||||
* @param dir The directory containing the context
|
||||
* @param isFiltered if true, the servlet is added to the filter path mapping
|
||||
* @throws IOException
|
||||
*/
|
||||
protected void addContext(String pathSpec, String dir, boolean isFiltered) throws IOException {
|
||||
if (0 == webServer.getHandlers().length) {
|
||||
throw new RuntimeException("Couldn't find handler");
|
||||
}
|
||||
WebAppContext webAppCtx = new WebAppContext();
|
||||
webAppCtx.setContextPath(pathSpec);
|
||||
webAppCtx.setWar(dir);
|
||||
addContext(webAppCtx, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a value in the webapp context. These values are available to the jsp
|
||||
* pages as "application.getAttribute(name)".
|
||||
* @param name The name of the attribute
|
||||
* @param value The value of the attribute
|
||||
*/
|
||||
public void setAttribute(String name, Object value) {
|
||||
webAppContext.setAttribute(name, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a servlet in the server.
|
||||
* @param name The name of the servlet (can be passed as null)
|
||||
* @param pathSpec The path spec for the servlet
|
||||
* @param clazz The servlet class
|
||||
*/
|
||||
public void addServlet(String name, String pathSpec,
|
||||
Class<? extends HttpServlet> clazz) {
|
||||
addInternalServlet(name, pathSpec, clazz);
|
||||
addFilterPathMapping(pathSpec, webAppContext);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an internal servlet in the server.
|
||||
* @param name The name of the servlet (can be passed as null)
|
||||
* @param pathSpec The path spec for the servlet
|
||||
* @param clazz The servlet class
|
||||
* @deprecated this is a temporary method
|
||||
*/
|
||||
@Deprecated
|
||||
public void addInternalServlet(String name, String pathSpec,
|
||||
Class<? extends HttpServlet> clazz) {
|
||||
ServletHolder holder = new ServletHolder(clazz);
|
||||
if (name != null) {
|
||||
holder.setName(name);
|
||||
}
|
||||
webAppContext.addServlet(holder, pathSpec);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void addFilter(String name, String classname,
|
||||
Map<String, String> parameters) {
|
||||
|
||||
final String[] USER_FACING_URLS = { "*.html", "*.jsp" };
|
||||
defineFilter(webAppContext, name, classname, parameters, USER_FACING_URLS);
|
||||
final String[] ALL_URLS = { "/*" };
|
||||
for (Map.Entry<Context, Boolean> e : defaultContexts.entrySet()) {
|
||||
if (e.getValue()) {
|
||||
Context ctx = e.getKey();
|
||||
defineFilter(ctx, name, classname, parameters, ALL_URLS);
|
||||
LOG.info("Added filter " + name + " (class=" + classname
|
||||
+ ") to context " + ctx.getDisplayName());
|
||||
}
|
||||
}
|
||||
filterNames.add(name);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void addGlobalFilter(String name, String classname,
|
||||
Map<String, String> parameters) {
|
||||
final String[] ALL_URLS = { "/*" };
|
||||
defineFilter(webAppContext, name, classname, parameters, ALL_URLS);
|
||||
for (Context ctx : defaultContexts.keySet()) {
|
||||
defineFilter(ctx, name, classname, parameters, ALL_URLS);
|
||||
}
|
||||
LOG.info("Added global filter" + name + " (class=" + classname + ")");
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a filter for a context and set up default url mappings.
|
||||
*/
|
||||
protected void defineFilter(Context ctx, String name,
|
||||
String classname, Map<String,String> parameters, String[] urls) {
|
||||
|
||||
FilterHolder holder = new FilterHolder();
|
||||
holder.setName(name);
|
||||
holder.setClassName(classname);
|
||||
holder.setInitParameters(parameters);
|
||||
FilterMapping fmap = new FilterMapping();
|
||||
fmap.setPathSpecs(urls);
|
||||
fmap.setDispatches(Handler.ALL);
|
||||
fmap.setFilterName(name);
|
||||
ServletHandler handler = ctx.getServletHandler();
|
||||
handler.addFilter(holder, fmap);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the path spec to the filter path mapping.
|
||||
* @param pathSpec The path spec
|
||||
* @param webAppCtx The WebApplicationContext to add to
|
||||
*/
|
||||
protected void addFilterPathMapping(String pathSpec,
|
||||
Context webAppCtx) {
|
||||
ServletHandler handler = webAppCtx.getServletHandler();
|
||||
for(String name : filterNames) {
|
||||
FilterMapping fmap = new FilterMapping();
|
||||
fmap.setPathSpec(pathSpec);
|
||||
fmap.setFilterName(name);
|
||||
fmap.setDispatches(Handler.ALL);
|
||||
handler.addFilterMapping(fmap);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the value in the webapp context.
|
||||
* @param name The name of the attribute
|
||||
* @return The value of the attribute
|
||||
*/
|
||||
public Object getAttribute(String name) {
|
||||
return webAppContext.getAttribute(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the pathname to the webapps files.
|
||||
* @return the pathname as a URL
|
||||
* @throws IOException if 'webapps' directory cannot be found on CLASSPATH.
|
||||
*/
|
||||
protected String getWebAppsPath() throws IOException {
|
||||
URL url = getClass().getClassLoader().getResource("webapps");
|
||||
if (url == null)
|
||||
throw new IOException("webapps not found in CLASSPATH");
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the port that the server is on
|
||||
* @return the port
|
||||
*/
|
||||
public int getPort() {
|
||||
return webServer.getConnectors()[0].getLocalPort();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the min, max number of worker threads (simultaneous connections).
|
||||
*/
|
||||
public void setThreads(int min, int max) {
|
||||
QueuedThreadPool pool = (QueuedThreadPool) webServer.getThreadPool() ;
|
||||
pool.setMinThreads(min);
|
||||
pool.setMaxThreads(max);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure an ssl listener on the server.
|
||||
* @param addr address to listen on
|
||||
* @param keystore location of the keystore
|
||||
* @param storPass password for the keystore
|
||||
* @param keyPass password for the key
|
||||
* @deprecated Use {@link #addSslListener(InetSocketAddress, Configuration, boolean)}
|
||||
*/
|
||||
@Deprecated
|
||||
public void addSslListener(InetSocketAddress addr, String keystore,
|
||||
String storPass, String keyPass) throws IOException {
|
||||
if (webServer.isStarted()) {
|
||||
throw new IOException("Failed to add ssl listener");
|
||||
}
|
||||
SslSocketConnector sslListener = new SslSocketConnector();
|
||||
sslListener.setHost(addr.getHostName());
|
||||
sslListener.setPort(addr.getPort());
|
||||
sslListener.setKeystore(keystore);
|
||||
sslListener.setPassword(storPass);
|
||||
sslListener.setKeyPassword(keyPass);
|
||||
webServer.addConnector(sslListener);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure an ssl listener on the server.
|
||||
* @param addr address to listen on
|
||||
* @param sslConf conf to retrieve ssl options
|
||||
* @param needClientAuth whether client authentication is required
|
||||
*/
|
||||
public void addSslListener(InetSocketAddress addr, Configuration sslConf,
|
||||
boolean needClientAuth) throws IOException {
|
||||
if (webServer.isStarted()) {
|
||||
throw new IOException("Failed to add ssl listener");
|
||||
}
|
||||
if (needClientAuth) {
|
||||
// setting up SSL truststore for authenticating clients
|
||||
System.setProperty("javax.net.ssl.trustStore", sslConf.get(
|
||||
"ssl.server.truststore.location", ""));
|
||||
System.setProperty("javax.net.ssl.trustStorePassword", sslConf.get(
|
||||
"ssl.server.truststore.password", ""));
|
||||
System.setProperty("javax.net.ssl.trustStoreType", sslConf.get(
|
||||
"ssl.server.truststore.type", "jks"));
|
||||
}
|
||||
SslSocketConnector sslListener = new SslSocketConnector();
|
||||
sslListener.setHost(addr.getHostName());
|
||||
sslListener.setPort(addr.getPort());
|
||||
sslListener.setKeystore(sslConf.get("ssl.server.keystore.location"));
|
||||
sslListener.setPassword(sslConf.get("ssl.server.keystore.password", ""));
|
||||
sslListener.setKeyPassword(sslConf.get("ssl.server.keystore.keypassword", ""));
|
||||
sslListener.setKeystoreType(sslConf.get("ssl.server.keystore.type", "jks"));
|
||||
sslListener.setNeedClientAuth(needClientAuth);
|
||||
webServer.addConnector(sslListener);
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the server. Does not wait for the server to start.
|
||||
*/
|
||||
public void start() throws IOException {
|
||||
try {
|
||||
int port = 0;
|
||||
int oriPort = listener.getPort(); // The original requested port
|
||||
while (true) {
|
||||
try {
|
||||
port = webServer.getConnectors()[0].getLocalPort();
|
||||
LOG.info("Port returned by webServer.getConnectors()[0]." +
|
||||
"getLocalPort() before open() is "+ port +
|
||||
". Opening the listener on " + oriPort);
|
||||
listener.open();
|
||||
port = listener.getLocalPort();
|
||||
LOG.info("listener.getLocalPort() returned " + listener.getLocalPort() +
|
||||
" webServer.getConnectors()[0].getLocalPort() returned " +
|
||||
webServer.getConnectors()[0].getLocalPort());
|
||||
//Workaround to handle the problem reported in HADOOP-4744
|
||||
if (port < 0) {
|
||||
Thread.sleep(100);
|
||||
int numRetries = 1;
|
||||
while (port < 0) {
|
||||
LOG.warn("listener.getLocalPort returned " + port);
|
||||
if (numRetries++ > MAX_RETRIES) {
|
||||
throw new Exception(" listener.getLocalPort is returning " +
|
||||
"less than 0 even after " +numRetries+" resets");
|
||||
}
|
||||
for (int i = 0; i < 2; i++) {
|
||||
LOG.info("Retrying listener.getLocalPort()");
|
||||
port = listener.getLocalPort();
|
||||
if (port > 0) {
|
||||
break;
|
||||
}
|
||||
Thread.sleep(200);
|
||||
}
|
||||
if (port > 0) {
|
||||
break;
|
||||
}
|
||||
LOG.info("Bouncing the listener");
|
||||
listener.close();
|
||||
Thread.sleep(1000);
|
||||
listener.setPort(oriPort == 0 ? 0 : (oriPort += 1));
|
||||
listener.open();
|
||||
Thread.sleep(100);
|
||||
port = listener.getLocalPort();
|
||||
}
|
||||
} //Workaround end
|
||||
LOG.info("Jetty bound to port " + port);
|
||||
webServer.start();
|
||||
break;
|
||||
} catch (IOException ex) {
|
||||
// if this is a bind exception,
|
||||
// then try the next port number.
|
||||
if (ex instanceof BindException) {
|
||||
if (!findPort) {
|
||||
throw (BindException) ex;
|
||||
}
|
||||
} else {
|
||||
LOG.info("HttpServer.start() threw a non Bind IOException");
|
||||
throw ex;
|
||||
}
|
||||
} catch (MultiException ex) {
|
||||
LOG.info("HttpServer.start() threw a MultiException");
|
||||
throw ex;
|
||||
}
|
||||
listener.setPort((oriPort += 1));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Problem starting http server", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* stop the server
|
||||
*/
|
||||
public void stop() throws Exception {
|
||||
listener.close();
|
||||
webServer.stop();
|
||||
}
|
||||
|
||||
public void join() throws InterruptedException {
|
||||
webServer.join();
|
||||
}
|
||||
|
||||
/**
|
||||
* A very simple servlet to serve up a text representation of the current
|
||||
* stack traces. It both returns the stacks to the caller and logs them.
|
||||
* Currently the stack traces are done sequentially rather than exactly the
|
||||
* same data.
|
||||
*/
|
||||
public static class StackServlet extends HttpServlet {
|
||||
private static final long serialVersionUID = -6284183679759467039L;
|
||||
|
||||
@Override
|
||||
public void doGet(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
|
||||
PrintWriter out = new PrintWriter(response.getOutputStream());
|
||||
ReflectionUtils.printThreadInfo(out, "");
|
||||
out.close();
|
||||
ReflectionUtils.logThreadInfo(LOG, "jsp requested", 1);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,207 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
import org.apache.hadoop.conf.Configurable;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
/**
|
||||
* Abstract base class for MapWritable and SortedMapWritable
|
||||
*
|
||||
* Unlike org.apache.nutch.crawl.MapWritable, this class allows creation of
|
||||
* MapWritable<Writable, MapWritable> so the CLASS_TO_ID and ID_TO_CLASS
|
||||
* maps travel with the class instead of being static.
|
||||
*
|
||||
* Class ids range from 1 to 127 so there can be at most 127 distinct classes
|
||||
* in any specific map instance.
|
||||
*/
|
||||
public abstract class AbstractMapWritable implements Writable, Configurable {
|
||||
private AtomicReference<Configuration> conf;
|
||||
|
||||
/* Class to id mappings */
|
||||
private Map<Class, Byte> classToIdMap = new ConcurrentHashMap<Class, Byte>();
|
||||
|
||||
/* Id to Class mappings */
|
||||
private Map<Byte, Class> idToClassMap = new ConcurrentHashMap<Byte, Class>();
|
||||
|
||||
/* The number of new classes (those not established by the constructor) */
|
||||
private volatile byte newClasses = 0;
|
||||
|
||||
/** @return the number of known classes */
|
||||
byte getNewClasses() {
|
||||
return newClasses;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to add "predefined" classes and by Writable to copy "new" classes.
|
||||
*/
|
||||
private synchronized void addToMap(Class clazz, byte id) {
|
||||
if (classToIdMap.containsKey(clazz)) {
|
||||
byte b = classToIdMap.get(clazz);
|
||||
if (b != id) {
|
||||
throw new IllegalArgumentException ("Class " + clazz.getName() +
|
||||
" already registered but maps to " + b + " and not " + id);
|
||||
}
|
||||
}
|
||||
if (idToClassMap.containsKey(id)) {
|
||||
Class c = idToClassMap.get(id);
|
||||
if (!c.equals(clazz)) {
|
||||
throw new IllegalArgumentException("Id " + id + " exists but maps to " +
|
||||
c.getName() + " and not " + clazz.getName());
|
||||
}
|
||||
}
|
||||
classToIdMap.put(clazz, id);
|
||||
idToClassMap.put(id, clazz);
|
||||
}
|
||||
|
||||
/** Add a Class to the maps if it is not already present. */
|
||||
protected synchronized void addToMap(Class clazz) {
|
||||
if (classToIdMap.containsKey(clazz)) {
|
||||
return;
|
||||
}
|
||||
if (newClasses + 1 > Byte.MAX_VALUE) {
|
||||
throw new IndexOutOfBoundsException("adding an additional class would" +
|
||||
" exceed the maximum number allowed");
|
||||
}
|
||||
byte id = ++newClasses;
|
||||
addToMap(clazz, id);
|
||||
}
|
||||
|
||||
/** @return the Class class for the specified id */
|
||||
protected Class getClass(byte id) {
|
||||
return idToClassMap.get(id);
|
||||
}
|
||||
|
||||
/** @return the id for the specified Class */
|
||||
protected byte getId(Class clazz) {
|
||||
return classToIdMap.containsKey(clazz) ? classToIdMap.get(clazz) : -1;
|
||||
}
|
||||
|
||||
/** Used by child copy constructors. */
|
||||
protected synchronized void copy(Writable other) {
|
||||
if (other != null) {
|
||||
try {
|
||||
DataOutputBuffer out = new DataOutputBuffer();
|
||||
other.write(out);
|
||||
DataInputBuffer in = new DataInputBuffer();
|
||||
in.reset(out.getData(), out.getLength());
|
||||
readFields(in);
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new IllegalArgumentException("map cannot be copied: " +
|
||||
e.getMessage());
|
||||
}
|
||||
|
||||
} else {
|
||||
throw new IllegalArgumentException("source map cannot be null");
|
||||
}
|
||||
}
|
||||
|
||||
/** constructor. */
|
||||
protected AbstractMapWritable() {
|
||||
this.conf = new AtomicReference<Configuration>();
|
||||
|
||||
addToMap(ArrayWritable.class,
|
||||
Byte.valueOf(Integer.valueOf(-127).byteValue()));
|
||||
addToMap(BooleanWritable.class,
|
||||
Byte.valueOf(Integer.valueOf(-126).byteValue()));
|
||||
addToMap(BytesWritable.class,
|
||||
Byte.valueOf(Integer.valueOf(-125).byteValue()));
|
||||
addToMap(FloatWritable.class,
|
||||
Byte.valueOf(Integer.valueOf(-124).byteValue()));
|
||||
addToMap(IntWritable.class,
|
||||
Byte.valueOf(Integer.valueOf(-123).byteValue()));
|
||||
addToMap(LongWritable.class,
|
||||
Byte.valueOf(Integer.valueOf(-122).byteValue()));
|
||||
addToMap(MapWritable.class,
|
||||
Byte.valueOf(Integer.valueOf(-121).byteValue()));
|
||||
addToMap(MD5Hash.class,
|
||||
Byte.valueOf(Integer.valueOf(-120).byteValue()));
|
||||
addToMap(NullWritable.class,
|
||||
Byte.valueOf(Integer.valueOf(-119).byteValue()));
|
||||
addToMap(ObjectWritable.class,
|
||||
Byte.valueOf(Integer.valueOf(-118).byteValue()));
|
||||
addToMap(SortedMapWritable.class,
|
||||
Byte.valueOf(Integer.valueOf(-117).byteValue()));
|
||||
addToMap(Text.class,
|
||||
Byte.valueOf(Integer.valueOf(-116).byteValue()));
|
||||
addToMap(TwoDArrayWritable.class,
|
||||
Byte.valueOf(Integer.valueOf(-115).byteValue()));
|
||||
|
||||
// UTF8 is deprecated so we don't support it
|
||||
|
||||
addToMap(VIntWritable.class,
|
||||
Byte.valueOf(Integer.valueOf(-114).byteValue()));
|
||||
addToMap(VLongWritable.class,
|
||||
Byte.valueOf(Integer.valueOf(-113).byteValue()));
|
||||
|
||||
}
|
||||
|
||||
/** @return the conf */
|
||||
public Configuration getConf() {
|
||||
return conf.get();
|
||||
}
|
||||
|
||||
/** @param conf the conf to set */
|
||||
public void setConf(Configuration conf) {
|
||||
this.conf.set(conf);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void write(DataOutput out) throws IOException {
|
||||
|
||||
// First write out the size of the class table and any classes that are
|
||||
// "unknown" classes
|
||||
|
||||
out.writeByte(newClasses);
|
||||
|
||||
for (byte i = 1; i <= newClasses; i++) {
|
||||
out.writeByte(i);
|
||||
out.writeUTF(getClass(i).getName());
|
||||
}
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
|
||||
// Get the number of "unknown" classes
|
||||
|
||||
newClasses = in.readByte();
|
||||
|
||||
// Then read in the class names and add them to our tables
|
||||
|
||||
for (int i = 0; i < newClasses; i++) {
|
||||
byte id = in.readByte();
|
||||
String className = in.readUTF();
|
||||
try {
|
||||
addToMap(Class.forName(className), id);
|
||||
|
||||
} catch (ClassNotFoundException e) {
|
||||
throw new IOException("can't find class: " + className + " because "+
|
||||
e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.*;
|
||||
import org.apache.hadoop.fs.*;
|
||||
import org.apache.hadoop.conf.*;
|
||||
import org.apache.hadoop.util.*;
|
||||
import org.apache.hadoop.io.SequenceFile.CompressionType;
|
||||
|
||||
|
||||
/** A dense file-based mapping from integers to values. */
|
||||
public class ArrayFile extends MapFile {
|
||||
|
||||
protected ArrayFile() {} // no public ctor
|
||||
|
||||
/** Write a new array file. */
|
||||
public static class Writer extends MapFile.Writer {
|
||||
private LongWritable count = new LongWritable(0);
|
||||
|
||||
/** Create the named file for values of the named class. */
|
||||
public Writer(Configuration conf, FileSystem fs,
|
||||
String file, Class<? extends Writable> valClass)
|
||||
throws IOException {
|
||||
super(conf, fs, file, LongWritable.class, valClass);
|
||||
}
|
||||
|
||||
/** Create the named file for values of the named class. */
|
||||
public Writer(Configuration conf, FileSystem fs,
|
||||
String file, Class<? extends Writable> valClass,
|
||||
CompressionType compress, Progressable progress)
|
||||
throws IOException {
|
||||
super(conf, fs, file, LongWritable.class, valClass, compress, progress);
|
||||
}
|
||||
|
||||
/** Append a value to the file. */
|
||||
public synchronized void append(Writable value) throws IOException {
|
||||
super.append(count, value); // add to map
|
||||
count.set(count.get()+1); // increment count
|
||||
}
|
||||
}
|
||||
|
||||
/** Provide access to an existing array file. */
|
||||
public static class Reader extends MapFile.Reader {
|
||||
private LongWritable key = new LongWritable();
|
||||
|
||||
/** Construct an array reader for the named file.*/
|
||||
public Reader(FileSystem fs, String file, Configuration conf) throws IOException {
|
||||
super(fs, file, conf);
|
||||
}
|
||||
|
||||
/** Positions the reader before its <code>n</code>th value. */
|
||||
public synchronized void seek(long n) throws IOException {
|
||||
key.set(n);
|
||||
seek(key);
|
||||
}
|
||||
|
||||
/** Read and return the next value in the file. */
|
||||
public synchronized Writable next(Writable value) throws IOException {
|
||||
return next(key, value) ? value : null;
|
||||
}
|
||||
|
||||
/** Returns the key associated with the most recent call to {@link
|
||||
* #seek(long)}, {@link #next(Writable)}, or {@link
|
||||
* #get(long,Writable)}. */
|
||||
public synchronized long key() throws IOException {
|
||||
return key.get();
|
||||
}
|
||||
|
||||
/** Return the <code>n</code>th value in the file. */
|
||||
public synchronized Writable get(long n, Writable value)
|
||||
throws IOException {
|
||||
key.set(n);
|
||||
return get(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.*;
|
||||
import java.lang.reflect.Array;
|
||||
|
||||
/**
|
||||
* A Writable for arrays containing instances of a class. The elements of this
|
||||
* writable must all be instances of the same class. If this writable will be
|
||||
* the input for a Reducer, you will need to create a subclass that sets the
|
||||
* value to be of the proper type.
|
||||
*
|
||||
* For example:
|
||||
* <code>
|
||||
* public class IntArrayWritable extends ArrayWritable {
|
||||
* public IntArrayWritable() {
|
||||
* super(IntWritable.class);
|
||||
* }
|
||||
* }
|
||||
* </code>
|
||||
*/
|
||||
public class ArrayWritable implements Writable {
|
||||
private Class<? extends Writable> valueClass;
|
||||
private Writable[] values;
|
||||
|
||||
public ArrayWritable(Class<? extends Writable> valueClass) {
|
||||
if (valueClass == null) {
|
||||
throw new IllegalArgumentException("null valueClass");
|
||||
}
|
||||
this.valueClass = valueClass;
|
||||
}
|
||||
|
||||
public ArrayWritable(Class<? extends Writable> valueClass, Writable[] values) {
|
||||
this(valueClass);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
public ArrayWritable(String[] strings) {
|
||||
this(UTF8.class, new Writable[strings.length]);
|
||||
for (int i = 0; i < strings.length; i++) {
|
||||
values[i] = new UTF8(strings[i]);
|
||||
}
|
||||
}
|
||||
|
||||
public Class getValueClass() {
|
||||
return valueClass;
|
||||
}
|
||||
|
||||
public String[] toStrings() {
|
||||
String[] strings = new String[values.length];
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
strings[i] = values[i].toString();
|
||||
}
|
||||
return strings;
|
||||
}
|
||||
|
||||
public Object toArray() {
|
||||
Object result = Array.newInstance(valueClass, values.length);
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
Array.set(result, i, values[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public void set(Writable[] values) { this.values = values; }
|
||||
|
||||
public Writable[] get() { return values; }
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
values = new Writable[in.readInt()]; // construct values
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
Writable value = WritableFactories.newInstance(valueClass);
|
||||
value.readFields(in); // read a value
|
||||
values[i] = value; // store it in values
|
||||
}
|
||||
}
|
||||
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeInt(values.length); // write values
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i].write(out);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
/**
|
||||
* Interface supported by {@link org.apache.hadoop.io.WritableComparable}
|
||||
* types supporting ordering/permutation by a representative set of bytes.
|
||||
*/
|
||||
public abstract class BinaryComparable implements Comparable<BinaryComparable> {
|
||||
|
||||
/**
|
||||
* Return n st bytes 0..n-1 from {#getBytes()} are valid.
|
||||
*/
|
||||
public abstract int getLength();
|
||||
|
||||
/**
|
||||
* Return representative byte array for this instance.
|
||||
*/
|
||||
public abstract byte[] getBytes();
|
||||
|
||||
/**
|
||||
* Compare bytes from {#getBytes()}.
|
||||
* @see org.apache.hadoop.io.WritableComparator#compareBytes(byte[],int,int,byte[],int,int)
|
||||
*/
|
||||
public int compareTo(BinaryComparable other) {
|
||||
if (this == other)
|
||||
return 0;
|
||||
return WritableComparator.compareBytes(getBytes(), 0, getLength(),
|
||||
other.getBytes(), 0, other.getLength());
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare bytes from {#getBytes()} to those provided.
|
||||
*/
|
||||
public int compareTo(byte[] other, int off, int len) {
|
||||
return WritableComparator.compareBytes(getBytes(), 0, getLength(),
|
||||
other, off, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if bytes from {#getBytes()} match.
|
||||
*/
|
||||
public boolean equals(Object other) {
|
||||
if (!(other instanceof BinaryComparable))
|
||||
return false;
|
||||
BinaryComparable that = (BinaryComparable)other;
|
||||
if (this.getLength() != that.getLength())
|
||||
return false;
|
||||
return this.compareTo(that) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a hash of the bytes returned from {#getBytes()}.
|
||||
* @see org.apache.hadoop.io.WritableComparator#hashBytes(byte[],int)
|
||||
*/
|
||||
public int hashCode() {
|
||||
return WritableComparator.hashBytes(getBytes(), getLength());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,259 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.SequenceFile.CompressionType;
|
||||
import org.apache.hadoop.io.compress.CompressionCodec;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
import org.apache.hadoop.util.bloom.DynamicBloomFilter;
|
||||
import org.apache.hadoop.util.bloom.Filter;
|
||||
import org.apache.hadoop.util.bloom.Key;
|
||||
import org.apache.hadoop.util.hash.Hash;
|
||||
|
||||
/**
|
||||
* This class extends {@link MapFile} and provides very much the same
|
||||
* functionality. However, it uses dynamic Bloom filters to provide
|
||||
* quick membership test for keys, and it offers a fast version of
|
||||
* {@link Reader#get(WritableComparable, Writable)} operation, especially in
|
||||
* case of sparsely populated MapFile-s.
|
||||
*/
|
||||
public class BloomMapFile {
|
||||
private static final Log LOG = LogFactory.getLog(BloomMapFile.class);
|
||||
public static final String BLOOM_FILE_NAME = "bloom";
|
||||
public static final int HASH_COUNT = 5;
|
||||
|
||||
public static void delete(FileSystem fs, String name) throws IOException {
|
||||
Path dir = new Path(name);
|
||||
Path data = new Path(dir, MapFile.DATA_FILE_NAME);
|
||||
Path index = new Path(dir, MapFile.INDEX_FILE_NAME);
|
||||
Path bloom = new Path(dir, BLOOM_FILE_NAME);
|
||||
|
||||
fs.delete(data, true);
|
||||
fs.delete(index, true);
|
||||
fs.delete(bloom, true);
|
||||
fs.delete(dir, true);
|
||||
}
|
||||
|
||||
public static class Writer extends MapFile.Writer {
|
||||
private DynamicBloomFilter bloomFilter;
|
||||
private int numKeys;
|
||||
private int vectorSize;
|
||||
private Key bloomKey = new Key();
|
||||
private DataOutputBuffer buf = new DataOutputBuffer();
|
||||
private FileSystem fs;
|
||||
private Path dir;
|
||||
|
||||
public Writer(Configuration conf, FileSystem fs, String dirName,
|
||||
Class<? extends WritableComparable> keyClass,
|
||||
Class<? extends Writable> valClass, CompressionType compress,
|
||||
CompressionCodec codec, Progressable progress) throws IOException {
|
||||
super(conf, fs, dirName, keyClass, valClass, compress, codec, progress);
|
||||
this.fs = fs;
|
||||
this.dir = new Path(dirName);
|
||||
initBloomFilter(conf);
|
||||
}
|
||||
|
||||
public Writer(Configuration conf, FileSystem fs, String dirName,
|
||||
Class<? extends WritableComparable> keyClass,
|
||||
Class valClass, CompressionType compress,
|
||||
Progressable progress) throws IOException {
|
||||
super(conf, fs, dirName, keyClass, valClass, compress, progress);
|
||||
this.fs = fs;
|
||||
this.dir = new Path(dirName);
|
||||
initBloomFilter(conf);
|
||||
}
|
||||
|
||||
public Writer(Configuration conf, FileSystem fs, String dirName,
|
||||
Class<? extends WritableComparable> keyClass,
|
||||
Class valClass, CompressionType compress)
|
||||
throws IOException {
|
||||
super(conf, fs, dirName, keyClass, valClass, compress);
|
||||
this.fs = fs;
|
||||
this.dir = new Path(dirName);
|
||||
initBloomFilter(conf);
|
||||
}
|
||||
|
||||
public Writer(Configuration conf, FileSystem fs, String dirName,
|
||||
WritableComparator comparator, Class valClass,
|
||||
CompressionType compress, CompressionCodec codec, Progressable progress)
|
||||
throws IOException {
|
||||
super(conf, fs, dirName, comparator, valClass, compress, codec, progress);
|
||||
this.fs = fs;
|
||||
this.dir = new Path(dirName);
|
||||
initBloomFilter(conf);
|
||||
}
|
||||
|
||||
public Writer(Configuration conf, FileSystem fs, String dirName,
|
||||
WritableComparator comparator, Class valClass,
|
||||
CompressionType compress, Progressable progress) throws IOException {
|
||||
super(conf, fs, dirName, comparator, valClass, compress, progress);
|
||||
this.fs = fs;
|
||||
this.dir = new Path(dirName);
|
||||
initBloomFilter(conf);
|
||||
}
|
||||
|
||||
public Writer(Configuration conf, FileSystem fs, String dirName,
|
||||
WritableComparator comparator, Class valClass, CompressionType compress)
|
||||
throws IOException {
|
||||
super(conf, fs, dirName, comparator, valClass, compress);
|
||||
this.fs = fs;
|
||||
this.dir = new Path(dirName);
|
||||
initBloomFilter(conf);
|
||||
}
|
||||
|
||||
public Writer(Configuration conf, FileSystem fs, String dirName,
|
||||
WritableComparator comparator, Class valClass) throws IOException {
|
||||
super(conf, fs, dirName, comparator, valClass);
|
||||
this.fs = fs;
|
||||
this.dir = new Path(dirName);
|
||||
initBloomFilter(conf);
|
||||
}
|
||||
|
||||
public Writer(Configuration conf, FileSystem fs, String dirName,
|
||||
Class<? extends WritableComparable> keyClass,
|
||||
Class valClass) throws IOException {
|
||||
super(conf, fs, dirName, keyClass, valClass);
|
||||
this.fs = fs;
|
||||
this.dir = new Path(dirName);
|
||||
initBloomFilter(conf);
|
||||
}
|
||||
|
||||
private synchronized void initBloomFilter(Configuration conf) {
|
||||
numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024);
|
||||
// vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for
|
||||
// single key, where <code> is the number of hash functions,
|
||||
// <code>n</code> is the number of keys and <code>c</code> is the desired
|
||||
// max. error rate.
|
||||
// Our desired error rate is by default 0.005, i.e. 0.5%
|
||||
float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f);
|
||||
vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) /
|
||||
Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT)));
|
||||
bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
|
||||
Hash.getHashType(conf), numKeys);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void append(WritableComparable key, Writable val)
|
||||
throws IOException {
|
||||
super.append(key, val);
|
||||
buf.reset();
|
||||
key.write(buf);
|
||||
bloomKey.set(buf.getData(), 1.0);
|
||||
bloomFilter.add(bloomKey);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() throws IOException {
|
||||
super.close();
|
||||
DataOutputStream out = fs.create(new Path(dir, BLOOM_FILE_NAME), true);
|
||||
bloomFilter.write(out);
|
||||
out.flush();
|
||||
out.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class Reader extends MapFile.Reader {
|
||||
private DynamicBloomFilter bloomFilter;
|
||||
private DataOutputBuffer buf = new DataOutputBuffer();
|
||||
private Key bloomKey = new Key();
|
||||
|
||||
public Reader(FileSystem fs, String dirName, Configuration conf)
|
||||
throws IOException {
|
||||
super(fs, dirName, conf);
|
||||
initBloomFilter(fs, dirName, conf);
|
||||
}
|
||||
|
||||
public Reader(FileSystem fs, String dirName, WritableComparator comparator,
|
||||
Configuration conf, boolean open) throws IOException {
|
||||
super(fs, dirName, comparator, conf, open);
|
||||
initBloomFilter(fs, dirName, conf);
|
||||
}
|
||||
|
||||
public Reader(FileSystem fs, String dirName, WritableComparator comparator,
|
||||
Configuration conf) throws IOException {
|
||||
super(fs, dirName, comparator, conf);
|
||||
initBloomFilter(fs, dirName, conf);
|
||||
}
|
||||
|
||||
private void initBloomFilter(FileSystem fs, String dirName,
|
||||
Configuration conf) {
|
||||
try {
|
||||
DataInputStream in = fs.open(new Path(dirName, BLOOM_FILE_NAME));
|
||||
bloomFilter = new DynamicBloomFilter();
|
||||
bloomFilter.readFields(in);
|
||||
in.close();
|
||||
} catch (IOException ioe) {
|
||||
LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile.");
|
||||
bloomFilter = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if this MapFile has the indicated key. The membership test is
|
||||
* performed using a Bloom filter, so the result has always non-zero
|
||||
* probability of false positives.
|
||||
* @param key key to check
|
||||
* @return false iff key doesn't exist, true if key probably exists.
|
||||
* @throws IOException
|
||||
*/
|
||||
public boolean probablyHasKey(WritableComparable key) throws IOException {
|
||||
if (bloomFilter == null) {
|
||||
return true;
|
||||
}
|
||||
buf.reset();
|
||||
key.write(buf);
|
||||
bloomKey.set(buf.getData(), 1.0);
|
||||
return bloomFilter.membershipTest(bloomKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fast version of the
|
||||
* {@link MapFile.Reader#get(WritableComparable, Writable)} method. First
|
||||
* it checks the Bloom filter for the existence of the key, and only if
|
||||
* present it performs the real get operation. This yields significant
|
||||
* performance improvements for get operations on sparsely populated files.
|
||||
*/
|
||||
@Override
|
||||
public synchronized Writable get(WritableComparable key, Writable val)
|
||||
throws IOException {
|
||||
if (!probablyHasKey(key)) {
|
||||
return null;
|
||||
}
|
||||
return super.get(key, val);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the Bloom filter used by this instance of the Reader.
|
||||
* @return a Bloom filter (see {@link Filter})
|
||||
*/
|
||||
public Filter getBloomFilter() {
|
||||
return bloomFilter;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* A WritableComparable for booleans.
|
||||
*/
|
||||
public class BooleanWritable implements WritableComparable {
|
||||
private boolean value;
|
||||
|
||||
/**
|
||||
*/
|
||||
public BooleanWritable() {};
|
||||
|
||||
/**
|
||||
*/
|
||||
public BooleanWritable(boolean value) {
|
||||
set(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value of the BooleanWritable
|
||||
*/
|
||||
public void set(boolean value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value of the BooleanWritable
|
||||
*/
|
||||
public boolean get() {
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
*/
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
value = in.readBoolean();
|
||||
}
|
||||
|
||||
/**
|
||||
*/
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeBoolean(value);
|
||||
}
|
||||
|
||||
/**
|
||||
*/
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof BooleanWritable)) {
|
||||
return false;
|
||||
}
|
||||
BooleanWritable other = (BooleanWritable) o;
|
||||
return this.value == other.value;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return value ? 0 : 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
*/
|
||||
public int compareTo(Object o) {
|
||||
boolean a = this.value;
|
||||
boolean b = ((BooleanWritable) o).value;
|
||||
return ((a == b) ? 0 : (a == false) ? -1 : 1);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return Boolean.toString(get());
|
||||
}
|
||||
|
||||
/**
|
||||
* A Comparator optimized for BooleanWritable.
|
||||
*/
|
||||
public static class Comparator extends WritableComparator {
|
||||
public Comparator() {
|
||||
super(BooleanWritable.class);
|
||||
}
|
||||
|
||||
public int compare(byte[] b1, int s1, int l1,
|
||||
byte[] b2, int s2, int l2) {
|
||||
return compareBytes(b1, s1, l1, b2, s2, l2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static {
|
||||
WritableComparator.define(BooleanWritable.class, new Comparator());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/** A WritableComparable for a single byte. */
|
||||
public class ByteWritable implements WritableComparable {
|
||||
private byte value;
|
||||
|
||||
public ByteWritable() {}
|
||||
|
||||
public ByteWritable(byte value) { set(value); }
|
||||
|
||||
/** Set the value of this ByteWritable. */
|
||||
public void set(byte value) { this.value = value; }
|
||||
|
||||
/** Return the value of this ByteWritable. */
|
||||
public byte get() { return value; }
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
value = in.readByte();
|
||||
}
|
||||
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeByte(value);
|
||||
}
|
||||
|
||||
/** Returns true iff <code>o</code> is a ByteWritable with the same value. */
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof ByteWritable)) {
|
||||
return false;
|
||||
}
|
||||
ByteWritable other = (ByteWritable)o;
|
||||
return this.value == other.value;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return (int)value;
|
||||
}
|
||||
|
||||
/** Compares two ByteWritables. */
|
||||
public int compareTo(Object o) {
|
||||
int thisValue = this.value;
|
||||
int thatValue = ((ByteWritable)o).value;
|
||||
return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return Byte.toString(value);
|
||||
}
|
||||
|
||||
/** A Comparator optimized for ByteWritable. */
|
||||
public static class Comparator extends WritableComparator {
|
||||
public Comparator() {
|
||||
super(ByteWritable.class);
|
||||
}
|
||||
|
||||
public int compare(byte[] b1, int s1, int l1,
|
||||
byte[] b2, int s2, int l2) {
|
||||
byte thisValue = b1[s1];
|
||||
byte thatValue = b2[s2];
|
||||
return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
|
||||
}
|
||||
}
|
||||
|
||||
static { // register this comparator
|
||||
WritableComparator.define(ByteWritable.class, new Comparator());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,216 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
/**
|
||||
* A byte sequence that is usable as a key or value.
|
||||
* It is resizable and distinguishes between the size of the seqeunce and
|
||||
* the current capacity. The hash function is the front of the md5 of the
|
||||
* buffer. The sort order is the same as memcmp.
|
||||
*/
|
||||
public class BytesWritable extends BinaryComparable
|
||||
implements WritableComparable<BinaryComparable> {
|
||||
private static final Log LOG = LogFactory.getLog(BytesWritable.class);
|
||||
private static final int LENGTH_BYTES = 4;
|
||||
private static final byte[] EMPTY_BYTES = {};
|
||||
|
||||
private int size;
|
||||
private byte[] bytes;
|
||||
|
||||
/**
|
||||
* Create a zero-size sequence.
|
||||
*/
|
||||
public BytesWritable() {this(EMPTY_BYTES);}
|
||||
|
||||
/**
|
||||
* Create a BytesWritable using the byte array as the initial value.
|
||||
* @param bytes This array becomes the backing storage for the object.
|
||||
*/
|
||||
public BytesWritable(byte[] bytes) {
|
||||
this.bytes = bytes;
|
||||
this.size = bytes.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the data from the BytesWritable.
|
||||
* @return The data is only valid between 0 and getLength() - 1.
|
||||
*/
|
||||
public byte[] getBytes() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the data from the BytesWritable.
|
||||
* @deprecated Use {@link #getBytes()} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public byte[] get() {
|
||||
return getBytes();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current size of the buffer.
|
||||
*/
|
||||
public int getLength() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current size of the buffer.
|
||||
* @deprecated Use {@link #getLength()} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public int getSize() {
|
||||
return getLength();
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the size of the buffer. The values in the old range are preserved
|
||||
* and any new values are undefined. The capacity is changed if it is
|
||||
* necessary.
|
||||
* @param size The new number of bytes
|
||||
*/
|
||||
public void setSize(int size) {
|
||||
if (size > getCapacity()) {
|
||||
setCapacity(size * 3 / 2);
|
||||
}
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the capacity, which is the maximum size that could handled without
|
||||
* resizing the backing storage.
|
||||
* @return The number of bytes
|
||||
*/
|
||||
public int getCapacity() {
|
||||
return bytes.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the capacity of the backing storage.
|
||||
* The data is preserved.
|
||||
* @param new_cap The new capacity in bytes.
|
||||
*/
|
||||
public void setCapacity(int new_cap) {
|
||||
if (new_cap != getCapacity()) {
|
||||
byte[] new_data = new byte[new_cap];
|
||||
if (new_cap < size) {
|
||||
size = new_cap;
|
||||
}
|
||||
if (size != 0) {
|
||||
System.arraycopy(bytes, 0, new_data, 0, size);
|
||||
}
|
||||
bytes = new_data;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the BytesWritable to the contents of the given newData.
|
||||
* @param newData the value to set this BytesWritable to.
|
||||
*/
|
||||
public void set(BytesWritable newData) {
|
||||
set(newData.bytes, 0, newData.size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value to a copy of the given byte range
|
||||
* @param newData the new values to copy in
|
||||
* @param offset the offset in newData to start at
|
||||
* @param length the number of bytes to copy
|
||||
*/
|
||||
public void set(byte[] newData, int offset, int length) {
|
||||
setSize(0);
|
||||
setSize(length);
|
||||
System.arraycopy(newData, offset, bytes, 0, size);
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
setSize(0); // clear the old data
|
||||
setSize(in.readInt());
|
||||
in.readFully(bytes, 0, size);
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeInt(size);
|
||||
out.write(bytes, 0, size);
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return super.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Are the two byte sequences equal?
|
||||
*/
|
||||
public boolean equals(Object right_obj) {
|
||||
if (right_obj instanceof BytesWritable)
|
||||
return super.equals(right_obj);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the stream of bytes as hex pairs separated by ' '.
|
||||
*/
|
||||
public String toString() {
|
||||
StringBuffer sb = new StringBuffer(3*size);
|
||||
for (int idx = 0; idx < size; idx++) {
|
||||
// if not the first, put a blank separator in
|
||||
if (idx != 0) {
|
||||
sb.append(' ');
|
||||
}
|
||||
String num = Integer.toHexString(0xff & bytes[idx]);
|
||||
// if it is only one digit, add a leading 0.
|
||||
if (num.length() < 2) {
|
||||
sb.append('0');
|
||||
}
|
||||
sb.append(num);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/** A Comparator optimized for BytesWritable. */
|
||||
public static class Comparator extends WritableComparator {
|
||||
public Comparator() {
|
||||
super(BytesWritable.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare the buffers in serialized form.
|
||||
*/
|
||||
public int compare(byte[] b1, int s1, int l1,
|
||||
byte[] b2, int s2, int l2) {
|
||||
return compareBytes(b1, s1+LENGTH_BYTES, l1-LENGTH_BYTES,
|
||||
b2, s2+LENGTH_BYTES, l2-LENGTH_BYTES);
|
||||
}
|
||||
}
|
||||
|
||||
static { // register this comparator
|
||||
WritableComparator.define(BytesWritable.class, new Comparator());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
/** @deprecated use java.io.Closeable */
|
||||
@Deprecated
|
||||
public interface Closeable extends java.io.Closeable{
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.util.zip.Deflater;
|
||||
import java.util.zip.DeflaterOutputStream;
|
||||
import java.util.zip.InflaterInputStream;
|
||||
|
||||
/** A base-class for Writables which store themselves compressed and lazily
|
||||
* inflate on field access. This is useful for large objects whose fields are
|
||||
* not be altered during a map or reduce operation: leaving the field data
|
||||
* compressed makes copying the instance from one file to another much
|
||||
* faster. */
|
||||
public abstract class CompressedWritable implements Writable {
|
||||
// if non-null, the compressed field data of this instance.
|
||||
private byte[] compressed;
|
||||
|
||||
public CompressedWritable() {}
|
||||
|
||||
public final void readFields(DataInput in) throws IOException {
|
||||
compressed = new byte[in.readInt()];
|
||||
in.readFully(compressed, 0, compressed.length);
|
||||
}
|
||||
|
||||
/** Must be called by all methods which access fields to ensure that the data
|
||||
* has been uncompressed. */
|
||||
protected void ensureInflated() {
|
||||
if (compressed != null) {
|
||||
try {
|
||||
ByteArrayInputStream deflated = new ByteArrayInputStream(compressed);
|
||||
DataInput inflater =
|
||||
new DataInputStream(new InflaterInputStream(deflated));
|
||||
readFieldsCompressed(inflater);
|
||||
compressed = null;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Subclasses implement this instead of {@link #readFields(DataInput)}. */
|
||||
protected abstract void readFieldsCompressed(DataInput in)
|
||||
throws IOException;
|
||||
|
||||
public final void write(DataOutput out) throws IOException {
|
||||
if (compressed == null) {
|
||||
ByteArrayOutputStream deflated = new ByteArrayOutputStream();
|
||||
Deflater deflater = new Deflater(Deflater.BEST_SPEED);
|
||||
DataOutputStream dout =
|
||||
new DataOutputStream(new DeflaterOutputStream(deflated, deflater));
|
||||
writeCompressed(dout);
|
||||
dout.close();
|
||||
deflater.end();
|
||||
compressed = deflated.toByteArray();
|
||||
}
|
||||
out.writeInt(compressed.length);
|
||||
out.write(compressed);
|
||||
}
|
||||
|
||||
/** Subclasses implement this instead of {@link #write(DataOutput)}. */
|
||||
protected abstract void writeCompressed(DataOutput out) throws IOException;
|
||||
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/** A reusable {@link DataInput} implementation that reads from an in-memory
|
||||
* buffer.
|
||||
*
|
||||
* <p>This saves memory over creating a new DataInputStream and
|
||||
* ByteArrayInputStream each time data is read.
|
||||
*
|
||||
* <p>Typical usage is something like the following:<pre>
|
||||
*
|
||||
* DataInputBuffer buffer = new DataInputBuffer();
|
||||
* while (... loop condition ...) {
|
||||
* byte[] data = ... get data ...;
|
||||
* int dataLength = ... get data length ...;
|
||||
* buffer.reset(data, dataLength);
|
||||
* ... read buffer using DataInput methods ...
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
public class DataInputBuffer extends DataInputStream {
|
||||
private static class Buffer extends ByteArrayInputStream {
|
||||
public Buffer() {
|
||||
super(new byte[] {});
|
||||
}
|
||||
|
||||
public void reset(byte[] input, int start, int length) {
|
||||
this.buf = input;
|
||||
this.count = start+length;
|
||||
this.mark = start;
|
||||
this.pos = start;
|
||||
}
|
||||
|
||||
public byte[] getData() { return buf; }
|
||||
public int getPosition() { return pos; }
|
||||
public int getLength() { return count; }
|
||||
}
|
||||
|
||||
private Buffer buffer;
|
||||
|
||||
/** Constructs a new empty buffer. */
|
||||
public DataInputBuffer() {
|
||||
this(new Buffer());
|
||||
}
|
||||
|
||||
private DataInputBuffer(Buffer buffer) {
|
||||
super(buffer);
|
||||
this.buffer = buffer;
|
||||
}
|
||||
|
||||
/** Resets the data that the buffer reads. */
|
||||
public void reset(byte[] input, int length) {
|
||||
buffer.reset(input, 0, length);
|
||||
}
|
||||
|
||||
/** Resets the data that the buffer reads. */
|
||||
public void reset(byte[] input, int start, int length) {
|
||||
buffer.reset(input, start, length);
|
||||
}
|
||||
|
||||
public byte[] getData() {
|
||||
return buffer.getData();
|
||||
}
|
||||
|
||||
/** Returns the current position in the input. */
|
||||
public int getPosition() { return buffer.getPosition(); }
|
||||
|
||||
/** Returns the length of the input. */
|
||||
public int getLength() { return buffer.getLength(); }
|
||||
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/** A reusable {@link DataOutput} implementation that writes to an in-memory
|
||||
* buffer.
|
||||
*
|
||||
* <p>This saves memory over creating a new DataOutputStream and
|
||||
* ByteArrayOutputStream each time data is written.
|
||||
*
|
||||
* <p>Typical usage is something like the following:<pre>
|
||||
*
|
||||
* DataOutputBuffer buffer = new DataOutputBuffer();
|
||||
* while (... loop condition ...) {
|
||||
* buffer.reset();
|
||||
* ... write buffer using DataOutput methods ...
|
||||
* byte[] data = buffer.getData();
|
||||
* int dataLength = buffer.getLength();
|
||||
* ... write data to its ultimate destination ...
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
public class DataOutputBuffer extends DataOutputStream {
|
||||
|
||||
private static class Buffer extends ByteArrayOutputStream {
|
||||
public byte[] getData() { return buf; }
|
||||
public int getLength() { return count; }
|
||||
|
||||
public Buffer() {
|
||||
super();
|
||||
}
|
||||
|
||||
public Buffer(int size) {
|
||||
super(size);
|
||||
}
|
||||
|
||||
public void write(DataInput in, int len) throws IOException {
|
||||
int newcount = count + len;
|
||||
if (newcount > buf.length) {
|
||||
byte newbuf[] = new byte[Math.max(buf.length << 1, newcount)];
|
||||
System.arraycopy(buf, 0, newbuf, 0, count);
|
||||
buf = newbuf;
|
||||
}
|
||||
in.readFully(buf, count, len);
|
||||
count = newcount;
|
||||
}
|
||||
}
|
||||
|
||||
private Buffer buffer;
|
||||
|
||||
/** Constructs a new empty buffer. */
|
||||
public DataOutputBuffer() {
|
||||
this(new Buffer());
|
||||
}
|
||||
|
||||
public DataOutputBuffer(int size) {
|
||||
this(new Buffer(size));
|
||||
}
|
||||
|
||||
private DataOutputBuffer(Buffer buffer) {
|
||||
super(buffer);
|
||||
this.buffer = buffer;
|
||||
}
|
||||
|
||||
/** Returns the current contents of the buffer.
|
||||
* Data is only valid to {@link #getLength()}.
|
||||
*/
|
||||
public byte[] getData() { return buffer.getData(); }
|
||||
|
||||
/** Returns the length of the valid data currently in the buffer. */
|
||||
public int getLength() { return buffer.getLength(); }
|
||||
|
||||
/** Resets the buffer to empty. */
|
||||
public DataOutputBuffer reset() {
|
||||
this.written = 0;
|
||||
buffer.reset();
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Writes bytes from a DataInput directly into the buffer. */
|
||||
public void write(DataInput in, int length) throws IOException {
|
||||
buffer.write(in, length);
|
||||
}
|
||||
|
||||
/** Write to a file stream */
|
||||
public void writeTo(OutputStream out) throws IOException {
|
||||
buffer.writeTo(out);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,199 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.io.serializer.Deserializer;
|
||||
import org.apache.hadoop.io.serializer.Serialization;
|
||||
import org.apache.hadoop.io.serializer.SerializationFactory;
|
||||
import org.apache.hadoop.io.serializer.Serializer;
|
||||
import org.apache.hadoop.util.GenericsUtil;
|
||||
|
||||
/**
|
||||
* DefaultStringifier is the default implementation of the {@link Stringifier}
|
||||
* interface which stringifies the objects using base64 encoding of the
|
||||
* serialized version of the objects. The {@link Serializer} and
|
||||
* {@link Deserializer} are obtained from the {@link SerializationFactory}.
|
||||
* <br>
|
||||
* DefaultStringifier offers convenience methods to store/load objects to/from
|
||||
* the configuration.
|
||||
*
|
||||
* @param <T> the class of the objects to stringify
|
||||
*/
|
||||
public class DefaultStringifier<T> implements Stringifier<T> {
|
||||
|
||||
private static final String SEPARATOR = ",";
|
||||
|
||||
private Serializer<T> serializer;
|
||||
|
||||
private Deserializer<T> deserializer;
|
||||
|
||||
private DataInputBuffer inBuf;
|
||||
|
||||
private DataOutputBuffer outBuf;
|
||||
|
||||
public DefaultStringifier(Configuration conf, Class<T> c) {
|
||||
|
||||
SerializationFactory factory = new SerializationFactory(conf);
|
||||
this.serializer = factory.getSerializer(c);
|
||||
this.deserializer = factory.getDeserializer(c);
|
||||
this.inBuf = new DataInputBuffer();
|
||||
this.outBuf = new DataOutputBuffer();
|
||||
try {
|
||||
serializer.open(outBuf);
|
||||
deserializer.open(inBuf);
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
public T fromString(String str) throws IOException {
|
||||
try {
|
||||
byte[] bytes = Base64.decodeBase64(str.getBytes("UTF-8"));
|
||||
inBuf.reset(bytes, bytes.length);
|
||||
T restored = deserializer.deserialize(null);
|
||||
return restored;
|
||||
} catch (UnsupportedCharsetException ex) {
|
||||
throw new IOException(ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
public String toString(T obj) throws IOException {
|
||||
outBuf.reset();
|
||||
serializer.serialize(obj);
|
||||
byte[] buf = new byte[outBuf.getLength()];
|
||||
System.arraycopy(outBuf.getData(), 0, buf, 0, buf.length);
|
||||
return new String(Base64.encodeBase64(buf));
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
inBuf.close();
|
||||
outBuf.close();
|
||||
deserializer.close();
|
||||
serializer.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores the item in the configuration with the given keyName.
|
||||
*
|
||||
* @param <K> the class of the item
|
||||
* @param conf the configuration to store
|
||||
* @param item the object to be stored
|
||||
* @param keyName the name of the key to use
|
||||
* @throws IOException : forwards Exceptions from the underlying
|
||||
* {@link Serialization} classes.
|
||||
*/
|
||||
public static <K> void store(Configuration conf, K item, String keyName)
|
||||
throws IOException {
|
||||
|
||||
DefaultStringifier<K> stringifier = new DefaultStringifier<K>(conf,
|
||||
GenericsUtil.getClass(item));
|
||||
conf.set(keyName, stringifier.toString(item));
|
||||
stringifier.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Restores the object from the configuration.
|
||||
*
|
||||
* @param <K> the class of the item
|
||||
* @param conf the configuration to use
|
||||
* @param keyName the name of the key to use
|
||||
* @param itemClass the class of the item
|
||||
* @return restored object
|
||||
* @throws IOException : forwards Exceptions from the underlying
|
||||
* {@link Serialization} classes.
|
||||
*/
|
||||
public static <K> K load(Configuration conf, String keyName,
|
||||
Class<K> itemClass) throws IOException {
|
||||
DefaultStringifier<K> stringifier = new DefaultStringifier<K>(conf,
|
||||
itemClass);
|
||||
try {
|
||||
String itemStr = conf.get(keyName);
|
||||
return stringifier.fromString(itemStr);
|
||||
} finally {
|
||||
stringifier.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores the array of items in the configuration with the given keyName.
|
||||
*
|
||||
* @param <K> the class of the item
|
||||
* @param conf the configuration to use
|
||||
* @param items the objects to be stored
|
||||
* @param keyName the name of the key to use
|
||||
* @throws IndexOutOfBoundsException if the items array is empty
|
||||
* @throws IOException : forwards Exceptions from the underlying
|
||||
* {@link Serialization} classes.
|
||||
*/
|
||||
public static <K> void storeArray(Configuration conf, K[] items,
|
||||
String keyName) throws IOException {
|
||||
|
||||
DefaultStringifier<K> stringifier = new DefaultStringifier<K>(conf,
|
||||
GenericsUtil.getClass(items[0]));
|
||||
try {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (K item : items) {
|
||||
builder.append(stringifier.toString(item)).append(SEPARATOR);
|
||||
}
|
||||
conf.set(keyName, builder.toString());
|
||||
}
|
||||
finally {
|
||||
stringifier.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Restores the array of objects from the configuration.
|
||||
*
|
||||
* @param <K> the class of the item
|
||||
* @param conf the configuration to use
|
||||
* @param keyName the name of the key to use
|
||||
* @param itemClass the class of the item
|
||||
* @return restored object
|
||||
* @throws IOException : forwards Exceptions from the underlying
|
||||
* {@link Serialization} classes.
|
||||
*/
|
||||
public static <K> K[] loadArray(Configuration conf, String keyName,
|
||||
Class<K> itemClass) throws IOException {
|
||||
DefaultStringifier<K> stringifier = new DefaultStringifier<K>(conf,
|
||||
itemClass);
|
||||
try {
|
||||
String itemStr = conf.get(keyName);
|
||||
ArrayList<K> list = new ArrayList<K>();
|
||||
String[] parts = itemStr.split(SEPARATOR);
|
||||
|
||||
for (String part : parts) {
|
||||
if (!part.equals(""))
|
||||
list.add(stringifier.fromString(part));
|
||||
}
|
||||
|
||||
return GenericsUtil.toArray(itemClass, list);
|
||||
}
|
||||
finally {
|
||||
stringifier.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Wrapper for {@link UTF8}.
|
||||
* This class should be used only when it is absolutely necessary
|
||||
* to use {@link UTF8}. The only difference is that using this class
|
||||
* does not require "@SuppressWarning" annotation to avoid javac warning.
|
||||
* Instead the deprecation is implied in the class name.
|
||||
*/
|
||||
@SuppressWarnings("deprecation")
|
||||
public class DeprecatedUTF8 extends UTF8 {
|
||||
|
||||
public DeprecatedUTF8() {
|
||||
super();
|
||||
}
|
||||
|
||||
/** Construct from a given string. */
|
||||
public DeprecatedUTF8(String string) {
|
||||
super(string);
|
||||
}
|
||||
|
||||
/** Construct from a given string. */
|
||||
public DeprecatedUTF8(DeprecatedUTF8 utf8) {
|
||||
super(utf8);
|
||||
}
|
||||
|
||||
/* The following two are the mostly commonly used methods.
|
||||
* wrapping them so that editors do not complain about the deprecation.
|
||||
*/
|
||||
|
||||
public static String readString(DataInput in) throws IOException {
|
||||
return UTF8.readString(in);
|
||||
}
|
||||
|
||||
public static int writeString(DataOutput out, String s) throws IOException {
|
||||
return UTF8.writeString(out, s);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Writable for Double values.
|
||||
*/
|
||||
public class DoubleWritable implements WritableComparable {
|
||||
|
||||
private double value = 0.0;
|
||||
|
||||
public DoubleWritable() {
|
||||
|
||||
}
|
||||
|
||||
public DoubleWritable(double value) {
|
||||
set(value);
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
value = in.readDouble();
|
||||
}
|
||||
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeDouble(value);
|
||||
}
|
||||
|
||||
public void set(double value) { this.value = value; }
|
||||
|
||||
public double get() { return value; }
|
||||
|
||||
/**
|
||||
* Returns true iff <code>o</code> is a DoubleWritable with the same value.
|
||||
*/
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof DoubleWritable)) {
|
||||
return false;
|
||||
}
|
||||
DoubleWritable other = (DoubleWritable)o;
|
||||
return this.value == other.value;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return (int)Double.doubleToLongBits(value);
|
||||
}
|
||||
|
||||
public int compareTo(Object o) {
|
||||
DoubleWritable other = (DoubleWritable)o;
|
||||
return (value < other.value ? -1 : (value == other.value ? 0 : 1));
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return Double.toString(value);
|
||||
}
|
||||
|
||||
/** A Comparator optimized for DoubleWritable. */
|
||||
public static class Comparator extends WritableComparator {
|
||||
public Comparator() {
|
||||
super(DoubleWritable.class);
|
||||
}
|
||||
|
||||
public int compare(byte[] b1, int s1, int l1,
|
||||
byte[] b2, int s2, int l2) {
|
||||
double thisValue = readDouble(b1, s1);
|
||||
double thatValue = readDouble(b2, s2);
|
||||
return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
|
||||
}
|
||||
}
|
||||
|
||||
static { // register this comparator
|
||||
WritableComparator.define(DoubleWritable.class, new Comparator());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,202 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.hadoop.conf.Configurable;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
/** A Writable wrapper for EnumSet. */
|
||||
public class EnumSetWritable<E extends Enum<E>> implements Writable,
|
||||
Configurable {
|
||||
|
||||
private EnumSet<E> value;
|
||||
|
||||
private Class<E> elementType;
|
||||
|
||||
private Configuration conf;
|
||||
|
||||
EnumSetWritable() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new EnumSetWritable. If the <tt>value</tt> argument is null or
|
||||
* its size is zero, the <tt>elementType</tt> argument must not be null. If
|
||||
* the argument <tt>value</tt>'s size is bigger than zero, the argument
|
||||
* <tt>elementType</tt> is not be used.
|
||||
*
|
||||
* @param value
|
||||
* @param elementType
|
||||
*/
|
||||
public EnumSetWritable(EnumSet<E> value, Class<E> elementType) {
|
||||
set(value, elementType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new EnumSetWritable. Argument <tt>value</tt> should not be null
|
||||
* or empty.
|
||||
*
|
||||
* @param value
|
||||
*/
|
||||
public EnumSetWritable(EnumSet<E> value) {
|
||||
this(value, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* reset the EnumSetWritable with specified
|
||||
* <tt>value</value> and <tt>elementType</tt>. If the <tt>value</tt> argument
|
||||
* is null or its size is zero, the <tt>elementType</tt> argument must not be
|
||||
* null. If the argument <tt>value</tt>'s size is bigger than zero, the
|
||||
* argument <tt>elementType</tt> is not be used.
|
||||
*
|
||||
* @param value
|
||||
* @param elementType
|
||||
*/
|
||||
public void set(EnumSet<E> value, Class<E> elementType) {
|
||||
if ((value == null || value.size() == 0)
|
||||
&& (this.elementType == null && elementType == null)) {
|
||||
throw new IllegalArgumentException(
|
||||
"The EnumSet argument is null, or is an empty set but with no elementType provided.");
|
||||
}
|
||||
this.value = value;
|
||||
if (value != null && value.size() > 0) {
|
||||
Iterator<E> iterator = value.iterator();
|
||||
this.elementType = iterator.next().getDeclaringClass();
|
||||
} else if (elementType != null) {
|
||||
this.elementType = elementType;
|
||||
}
|
||||
}
|
||||
|
||||
/** Return the value of this EnumSetWritable. */
|
||||
public EnumSet<E> get() {
|
||||
return value;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@SuppressWarnings("unchecked")
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
int length = in.readInt();
|
||||
if (length == -1)
|
||||
this.value = null;
|
||||
else if (length == 0) {
|
||||
this.elementType = (Class<E>) ObjectWritable.loadClass(conf,
|
||||
WritableUtils.readString(in));
|
||||
this.value = EnumSet.noneOf(this.elementType);
|
||||
} else {
|
||||
E first = (E) ObjectWritable.readObject(in, conf);
|
||||
this.value = (EnumSet<E>) EnumSet.of(first);
|
||||
for (int i = 1; i < length; i++)
|
||||
this.value.add((E) ObjectWritable.readObject(in, conf));
|
||||
}
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void write(DataOutput out) throws IOException {
|
||||
if (this.value == null) {
|
||||
out.writeInt(-1);
|
||||
WritableUtils.writeString(out, this.elementType.getName());
|
||||
} else {
|
||||
Object[] array = this.value.toArray();
|
||||
int length = array.length;
|
||||
out.writeInt(length);
|
||||
if (length == 0) {
|
||||
if (this.elementType == null)
|
||||
throw new UnsupportedOperationException(
|
||||
"Unable to serialize empty EnumSet with no element type provided.");
|
||||
WritableUtils.writeString(out, this.elementType.getName());
|
||||
}
|
||||
for (int i = 0; i < length; i++) {
|
||||
ObjectWritable.writeObject(out, array[i], array[i].getClass(), conf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if <code>o</code> is an EnumSetWritable with the same value,
|
||||
* or both are null.
|
||||
*/
|
||||
public boolean equals(Object o) {
|
||||
if (o == null) {
|
||||
throw new IllegalArgumentException("null argument passed in equal().");
|
||||
}
|
||||
|
||||
if (!(o instanceof EnumSetWritable))
|
||||
return false;
|
||||
|
||||
EnumSetWritable<?> other = (EnumSetWritable<?>) o;
|
||||
|
||||
if (this == o || (this.value == other.value))
|
||||
return true;
|
||||
if (this.value == null) // other.value must not be null if we reach here
|
||||
return false;
|
||||
|
||||
return this.value.equals(other.value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the class of all the elements of the underlying EnumSetWriable. It
|
||||
* may return null.
|
||||
*
|
||||
* @return the element class
|
||||
*/
|
||||
public Class<E> getElementType() {
|
||||
return elementType;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public int hashCode() {
|
||||
if (value == null)
|
||||
return 0;
|
||||
return (int) value.hashCode();
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public String toString() {
|
||||
if (value == null)
|
||||
return "(null)";
|
||||
return value.toString();
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public Configuration getConf() {
|
||||
return this.conf;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public void setConf(Configuration conf) {
|
||||
this.conf = conf;
|
||||
}
|
||||
|
||||
static {
|
||||
WritableFactories.setFactory(EnumSetWritable.class, new WritableFactory() {
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public Writable newInstance() {
|
||||
return new EnumSetWritable();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue